plus sc10x

i.P06 and P30.PBS
check DAM signature

  1. P30.PBS and P30.LPS
    check TDT difference/shift
load dependancies

cleaning up

processed obj

GEX.seur <- readRDS("../Spp1tdt.GEX.seur.sort1006.rds")
GEX.seur
## An object of class Seurat 
## 17900 features across 23829 samples within 1 assay 
## Active assay: RNA (17900 features, 1246 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
color.FB <- ggsci::pal_igv("default")(49)[c(8,32,40,
                                            34,26,33,28,
                                            2,43,18)]
color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.info", cols = color.FB)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "preAnno")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FB,
        pt.size = 0.1, group.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FB,
        pt.size = 0, group.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "age") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "age") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb", group.by = "age")
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset= preAnno %in% c("Microglia") & DoubletFinder0.05 == "Singlet")
GEX.seur
## An object of class Seurat 
## 17900 features across 22261 samples within 1 assay 
## Active assay: RNA (17900 features, 1246 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.info", cols = color.FB)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "preAnno")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FB,
        pt.size = 0.1, group.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FB,
        pt.size = 0, group.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "age") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "age") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb", group.by = "age")
plota + plotb + plotc

subset1

GEX.seur <- subset(GEX.seur, subset= age %in% c("P06","P30.PBS"))
GEX.seur
## An object of class Seurat 
## 17900 features across 13001 samples within 1 assay 
## Active assay: RNA (17900 features, 1246 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
color.FB1 <- color.FB[c(1:3,8:10)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.info", cols = color.FB1)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "preAnno")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FB1,
        pt.size = 0.1, group.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FB1,
        pt.size = 0, group.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "age") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "age") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb", group.by = "age")
plota + plotb + plotc

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0.05, group.by = "sort_clusters")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "sort_clusters")

advanced

GEX.seur <- subset(GEX.seur, subset= percent.mt < 7.5 & nFeature_RNA > 1000 & nFeature_RNA < 3600 & nCount_RNA < 12000)
GEX.seur
## An object of class Seurat 
## 17900 features across 12877 samples within 1 assay 
## Active assay: RNA (17900 features, 1246 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.info", cols = color.FB1)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "preAnno")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FB1,
        pt.size = 0.1, group.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FB1,
        pt.size = 0, group.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "age") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "age") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb", group.by = "age")
plota + plotb + plotc

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0.05, group.by = "sort_clusters")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "sort_clusters")

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,3800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(GEX.seur$FB.info), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+245,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

subset1

re-clustering

GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 300)
##   [1] "Ccl4"          "Hist1h1b"      "Mki67"         "Ccl3"         
##   [5] "Top2a"         "Apoe"          "Hist1h2ap"     "Xist"         
##   [9] "Cxcl10"        "Hist1h2ae"     "Cenpf"         "Spp1"         
##  [13] "Apoc1"         "Ccl5"          "Hmgb2"         "Ube2c"        
##  [17] "Prc1"          "Stmn1"         "Egr1"          "Lpl"          
##  [21] "Ccl12"         "Fn1"           "Ccl2"          "Pclaf"        
##  [25] "Hist1h3c"      "Ifi27l2a"      "Hmmr"          "Fabp5"        
##  [29] "Nusap1"        "Ifit3"         "Ms4a7"         "Hist1h2ab"    
##  [33] "Lgals3"        "Cd74"          "Rsad2"         "Cxcl9"        
##  [37] "Cenpa"         "Birc5"         "Ifitm3"        "Cxcl13"       
##  [41] "Iigp1"         "Kif11"         "Egr3"          "Knl1"         
##  [45] "Atf3"          "Cenpe"         "Ccl7"          "H2afx"        
##  [49] "Aspm"          "Cdca8"         "Pbk"           "Hist1h1e"     
##  [53] "Tpx2"          "Ifit1"         "Smc4"          "Arg1"         
##  [57] "Smc2"          "Ifit2"         "Olfr1369-ps1"  "Apoc4"        
##  [61] "Il1b"          "Kif23"         "H2afz"         "Cdca3"        
##  [65] "Hist1h4d"      "Esco2"         "Hist2h2ac"     "Cd72"         
##  [69] "Kif15"         "Lyz2"          "Rrm2"          "Gdf15"        
##  [73] "Cdkn1a"        "Fxyd2"         "Isg15"         "Ccr1"         
##  [77] "Mis18bp1"      "Gm42047"       "Dennd5b"       "Spc24"        
##  [81] "Ftl1"          "Plk1"          "Gm26917"       "Cdk1"         
##  [85] "Ccnb2"         "Cd52"          "Racgap1"       "Lockd"        
##  [89] "Pf4"           "Olfr889"       "Dqx1"          "Wfdc17"       
##  [93] "Kif20b"        "Ccnb1"         "Csf1"          "Egr2"         
##  [97] "H2-Ab1"        "Ckap2l"        "Spc25"         "Tk1"          
## [101] "Cd63"          "Hist1h1a"      "Hist1h1d"      "Aldh1a3"      
## [105] "Ccna2"         "Pou4f1"        "Gadd45b"       "Plac8"        
## [109] "Loxl2"         "Hba-a2"        "Anln"          "Sgo2a"        
## [113] "Igf1"          "Hba-a1"        "Cybb"          "Sparcl1"      
## [117] "Ifi204"        "H2-Aa"         "Mt1"           "Ndc80"        
## [121] "Mmp12"         "Clspn"         "Neil3"         "Bub1b"        
## [125] "Cxcl14"        "Slfn5"         "Cdc20"         "Mrc1"         
## [129] "Nuf2"          "Nek2"          "Atad2"         "Sct"          
## [133] "Slfn1"         "Bub1"          "Ccl6"          "Clec12a"      
## [137] "Hells"         "Lgals1"        "Kcnq1ot1"      "Ifit3b"       
## [141] "Ccnf"          "Cdca2"         "Gm43409"       "Cks1b"        
## [145] "Cd36"          "Plp1"          "Lmnb1"         "4930447C04Rik"
## [149] "Aurkb"         "H2-Eb1"        "Hist1h3e"      "Tubb4b"       
## [153] "Tmpo"          "Ccne2"         "Cks2"          "Gm8251"       
## [157] "Dtl"           "Slc1a2"        "Crybb1"        "Sgo1"         
## [161] "Fbxo5"         "Rhox5"         "Shcbp1"        "Ch25h"        
## [165] "Melk"          "Tsix"          "Hist1h2af"     "Rad51ap1"     
## [169] "Lig1"          "H2afv"         "Incenp"        "Il1rn"        
## [173] "Tubb5"         "Gm50255"       "Ptprcap"       "Cdkn3"        
## [177] "Ezh2"          "Gfod2"         "Plk2"          "Fth1"         
## [181] "Spag5"         "Rad51"         "Pimreg"        "Hist1h3i"     
## [185] "Cd83"          "Tmem26"        "Cep55"         "Kif20a"       
## [189] "Diaph3"        "Ckap2"         "Dlgap5"        "Kif4"         
## [193] "Myl4"          "Cit"           "Rps19"         "Trim59"       
## [197] "Slc7a11"       "Ier3"          "Ncapg"         "Emp3"         
## [201] "Knstrn"        "Kif22"         "mt-Nd1"        "C4b"          
## [205] "Pcna"          "Gm26885"       "Sox11"         "Ccl9"         
## [209] "Cd28"          "Nefm"          "Ifnb1"         "Diras2"       
## [213] "Tceal5"        "Gm10800"       "Cst7"          "Ly6a"         
## [217] "Ccdc34"        "Tuba1b"        "Lilrb4a"       "Prr11"        
## [221] "Wdcp"          "Dut"           "Brca1"         "Rpl32"        
## [225] "Ptma"          "Ptprz1"        "1500002C15Rik" "Mif"          
## [229] "E2f8"          "B930036N10Rik" "Ifi211"        "Troap"        
## [233] "Rps2"          "Rps26"         "mt-Co1"        "Ncapg2"       
## [237] "Aurka"         "H2-Q7"         "Gnaz"          "Hmox1"        
## [241] "Mcemp1"        "Ifi30"         "Hirip3"        "Cbx5"         
## [245] "Apoc2"         "E2f7"          "Uhrf1"         "Gpnmb"        
## [249] "Ptcra"         "Gas2l3"        "Rpsa"          "Prdx1"        
## [253] "Fos"           "Rplp0"         "Neb"           "Ctsb"         
## [257] "Pcp4"          "Nes"           "Syde1"         "Dennd3"       
## [261] "Rrm1"          "Cdk19os"       "Rnase2a"       "Scn2a"        
## [265] "Ifitm7"        "Cdc25c"        "Cd9"           "Sag"          
## [269] "Ctla2a"        "Ptn"           "Cp"            "Ly75"         
## [273] "Gjc1"          "Cfb"           "Mt2"           "Rps20"        
## [277] "Hist1h2bj"     "Hist2h2bb"     "Kif14"         "Stil"         
## [281] "Hist1h2bn"     "Cobll1"        "Gm47541"       "mt-Atp6"      
## [285] "Id2"           "Bst2"          "Ifi44"         "Rbm3"         
## [289] "Ifi207"        "B230312C02Rik" "Ms4a6c"        "Oasl2"        
## [293] "Rps12"         "Rph3a"         "3830403N18Rik" "Gm34455"      
## [297] "Igkc"          "Cenpm"         "Hmgn2"         "Rpl41"

PCA

# exclude MT genes  and more 
#   add sex-related Xist/Tsix

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|Egr1|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AC|^AI|^AA|^AW|^AY|^BC|^Gm|^Hist|Rik$|-ps|Xist|Tsix|^Ifi|^Isg|^Mcm",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)

VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Pclaf, Knl1, Prc1, Pbk, Spc24, Kif15, Lockd, Neil3, Esco2, Tk1 
##     Ccna2, Racgap1, Bub1b, Stmn1, Sgo2a, Spc25, Cit, Shcbp1, Mis18bp1, Aspm 
##     Ccnf, Ncapg, Knstrn, Ccnb1, Plk1, Cenpm, Sgo1, H2afx, Kif4, Trim59 
## Negative:  Ctsd, Pmp22, Slc1a3, Fam102b, Lrba, Thrsp, Ddit4, Sox4, Cd9, Pmepa1 
##     Csmd3, Upk1b, Hpgd, Abca1, Il7r, Tent5c, Ccl9, Inpp4b, Zbtb20, Tnfrsf17 
##     Sgk1, Syngr1, Arid5b, Itpr2, Tent5a, Plxna4os1, Slc12a2, Ccl6, Pitpnc1, Klrd1 
## PC_ 2 
## Positive:  Ctsd, Pmp22, Fam102b, Lrba, Slc1a3, Thrsp, Il7r, Csmd3, Cd9, Upk1b 
##     Prc1, Pbk, Pmepa1, Zbtb20, Pclaf, Ddit4, Tent5c, Esco2, Knl1, Neil3 
##     Ccna2, Ccnf, Spc25, Kif15, Sgo2a, Lockd, Tjp1, Spc24, Fbxo5, Cadm1 
## Negative:  Tpt1, Fau, Tmsb4x, Eef1a1, Rack1, Apoe, Cfl1, Rbm3, Eef1b2, Cd52 
##     Eif3f, Uba52, Gas5, Cox4i1, Zfas1, Uqcrh, Igfbp4, Ctsl, Atp5g2, Emp3 
##     Pabpc1, Ftl1, Atp5e, Npm1, Cox6c, Ucp2, Nme2, Nsa2, Hint1, Gatm 
## PC_ 3 
## Positive:  Fau, Crybb1, Ramp1, Tpt1, Hpgd, Igfbp4, Arsb, Lst1, Tmsb4x, Eif3f 
##     Apoe, Sox4, Eef1a1, Lrp1, Uqcrh, Lsp1, Gnas, Sgk1, Hmgb1, Pmepa1 
##     Mrc1, Slc12a2, Cryba4, H2afv, Npnt, Uba52, Zfas1, Zfp36l1, Serpinf1, Cbfa2t3 
## Negative:  Ccl12, Bst2, Srgn, Lgals3bp, Fth1, Sdf2l1, Slfn2, Tspo, Ctsz, Ms4a6c 
##     Cd63, Fcgr4, Rtp4, C5ar1, Gpr84, Trim30a, Ccl2, Gapdh, Slfn5, Nme2 
##     Stat1, Fgl2, Xaf1, Parp14, Calm1, Cxcl16, Sdc3, Rnf213, Oasl2, Zbp1 
## PC_ 4 
## Positive:  Ccr5, Nav3, Gbp7, Slfn2, Igfbp4, Ccl12, Trim30a, Ccnd1, Eif2ak2, Parp14 
##     Rtp4, Zfp36l1, Serpinf1, Slfn5, Fam111a, Atrx, Camk2n1, Stat1, Xaf1, Klf7 
##     Crybb1, Rnf213, Oasl2, Gas5, Myh9, Tjp1, Sp100, Herc6, Diaph2, Ddx58 
## Negative:  Ctsd, Cd9, Ctsb, Ftl1, Ctsz, Cd63, Mt1, Atp6v0c, Lyz2, Cadm1 
##     Cd83, Pmp22, Igf1, Ank, Apoc1, Cstb, Anxa5, Syngr1, Lgals3, Scd2 
##     Atf3, Ccl3, Apoe, Fth1, Plaur, Itgax, Lpl, Abcg1, Mif, Fabp5 
## PC_ 5 
## Positive:  Macf1, Lig1, Slc12a2, Hnrnpa3, Ncl, Lrp1, Ptma, Dhfr, Myh9, Nes 
##     Scaf11, Plek, Cdk6, Ccl9, Sox4, Ccl6, Dut, Dst, Rif1, Prrc2c 
##     Ank, Smarca5, Lpl, Nsd2, Atrx, Ttc3, Eif3a, Dnmt1, Pole, Tjp1 
## Negative:  Tmem176a, Apoe, Apoc1, H2-D1, Cdkn3, H2-K1, Fth1, Ccnb1, Knstrn, Plk1 
##     Cd52, Aspm, Fau, Racgap1, Lyz2, Kif20a, Prc1, Cfb, Apoc2, Cyp4f18 
##     Lockd, Ccna2, Tspo, Prr11, Cdkn2c, Bub1b, Kif22, Rnase6, Ccr1, H2-Q7
length(VariableFeatures(GEX.seur))
## [1] 1090
head(VariableFeatures(GEX.seur),500)
##   [1] "Ccl4"      "Ccl3"      "Apoe"      "Cxcl10"    "Spp1"      "Apoc1"    
##   [7] "Ccl5"      "Prc1"      "Stmn1"     "Lpl"       "Ccl12"     "Fn1"      
##  [13] "Ccl2"      "Pclaf"     "Fabp5"     "Ms4a7"     "Lgals3"    "Cd74"     
##  [19] "Rsad2"     "Cxcl9"     "Cxcl13"    "Iigp1"     "Egr3"      "Knl1"     
##  [25] "Atf3"      "Ccl7"      "H2afx"     "Aspm"      "Pbk"       "Arg1"     
##  [31] "Smc2"      "Apoc4"     "Il1b"      "H2afz"     "Esco2"     "Cd72"     
##  [37] "Kif15"     "Lyz2"      "Gdf15"     "Cdkn1a"    "Fxyd2"     "Ccr1"     
##  [43] "Mis18bp1"  "Dennd5b"   "Spc24"     "Ftl1"      "Plk1"      "Cd52"     
##  [49] "Racgap1"   "Lockd"     "Pf4"       "Olfr889"   "Dqx1"      "Wfdc17"   
##  [55] "Ccnb1"     "Csf1"      "Egr2"      "H2-Ab1"    "Spc25"     "Tk1"      
##  [61] "Cd63"      "Aldh1a3"   "Ccna2"     "Pou4f1"    "Gadd45b"   "Plac8"    
##  [67] "Loxl2"     "Sgo2a"     "Igf1"      "Cybb"      "Sparcl1"   "H2-Aa"    
##  [73] "Mt1"       "Mmp12"     "Neil3"     "Bub1b"     "Cxcl14"    "Slfn5"    
##  [79] "Mrc1"      "Sct"       "Slfn1"     "Ccl6"      "Clec12a"   "Lgals1"   
##  [85] "Kcnq1ot1"  "Ccnf"      "Cd36"      "Plp1"      "Lmnb1"     "H2-Eb1"   
##  [91] "Slc1a2"    "Crybb1"    "Sgo1"      "Fbxo5"     "Rhox5"     "Shcbp1"   
##  [97] "Ch25h"     "Melk"      "Lig1"      "H2afv"     "Incenp"    "Il1rn"    
## [103] "Tubb5"     "Ptprcap"   "Cdkn3"     "Ezh2"      "Gfod2"     "Plk2"     
## [109] "Fth1"      "Spag5"     "Cd83"      "Tmem26"    "Cep55"     "Kif20a"   
## [115] "Diaph3"    "Kif4"      "Myl4"      "Cit"       "Trim59"    "Slc7a11"  
## [121] "Ier3"      "Ncapg"     "Emp3"      "Knstrn"    "Kif22"     "C4b"      
## [127] "Sox11"     "Ccl9"      "Cd28"      "Nefm"      "Ifnb1"     "Diras2"   
## [133] "Tceal5"    "Cst7"      "Ly6a"      "Ccdc34"    "Tuba1b"    "Lilrb4a"  
## [139] "Prr11"     "Wdcp"      "Dut"       "Brca1"     "Ptma"      "Ptprz1"   
## [145] "Mif"       "Troap"     "Ncapg2"    "H2-Q7"     "Gnaz"      "Hmox1"    
## [151] "Mcemp1"    "Hirip3"    "Apoc2"     "E2f7"      "Gpnmb"     "Ptcra"    
## [157] "Prdx1"     "Neb"       "Ctsb"      "Pcp4"      "Nes"       "Syde1"    
## [163] "Dennd3"    "Cdk19os"   "Rnase2a"   "Scn2a"     "Cd9"       "Sag"      
## [169] "Ctla2a"    "Ptn"       "Cp"        "Ly75"      "Gjc1"      "Cfb"      
## [175] "Mt2"       "Kif14"     "Stil"      "Cobll1"    "Id2"       "Bst2"     
## [181] "Rbm3"      "Ms4a6c"    "Oasl2"     "Rph3a"     "Cenpm"     "Hmgn2"    
## [187] "Klf2"      "Jsrp1"     "Gbp2"      "Ank"       "Zwilch"    "Bora"     
## [193] "Dnase1l3"  "C3"        "Gbp4"      "Brip1"     "Cip2a"     "Meis2"    
## [199] "Kif21a"    "Fabp3"     "Lgals3bp"  "Tuba1c"    "Rtp4"      "Arl6ip1"  
## [205] "Klf1"      "Sst"       "Ocstamp"   "Sox2"      "Adgrg5"    "Asf1b"    
## [211] "Ube2t"     "Srxn1"     "Cpne9"     "Apob"      "Ercc6l"    "Rab7b"    
## [217] "Gpr84"     "Tspo"      "Slfn2"     "Ctsl"      "H2-D1"     "Cxcl16"   
## [223] "Cdc25b"    "Ska1"      "Tgfb2"     "Olig1"     "Luzp2"     "Anks1b"   
## [229] "Cldn7"     "Cd79a"     "H2-K1"     "Cdkn2c"    "Chchd10"   "Irf7"     
## [235] "Folr2"     "Cmpk2"     "Mns1"      "Dhfr"      "Smc1a"     "Ccne1"    
## [241] "Kcnk2"     "Kcnh7"     "Dkk2"      "Fam110b"   "Ptprf"     "Fbxo41"   
## [247] "Cpeb1"     "Sytl2"     "Gipc3"     "Gli1"      "Slc36a3os" "Gria1"    
## [253] "Arg2"      "Erc2"      "C1qtnf9"   "Mal2"      "Mov10l1"   "Pcdhb18"  
## [259] "Timp1"     "Cstb"      "Anp32b"    "Spdya"     "Ube2s"     "Fam20c"   
## [265] "Ramp1"     "Cenph"     "Tpr"       "Kifc1"     "H1f0"      "Arpp21"   
## [271] "Gpr65"     "Plcb1"     "Chl1"      "Cfl1"      "Chic1"     "Nucks1"   
## [277] "Pmp22"     "Foxm1"     "Stat1"     "Hmgb1"     "Perm1"     "Ahnak2"   
## [283] "Fxyd5"     "Rcan1"     "Fau"       "Klhl33"    "Depdc1a"   "Gatm"     
## [289] "Rbm44"     "Zbtb20"    "Sdc4"      "Atad5"     "Gnao1"     "Trib3"    
## [295] "Ccr7"      "Robo2"     "Kcnk4"     "Dek"       "Fcrla"     "Zfas1"    
## [301] "Cd69"      "Usp18"     "Igfbp4"    "Nhs"       "Tpt1"      "Il7r"     
## [307] "Car2"      "Efcab11"   "Fam111a"   "Mobp"      "Mpz"       "Hook1"    
## [313] "Ntrk2"     "Nefl"      "Spock1"    "Ulbp1"     "Gas5"      "Gpx3"     
## [319] "Mxd3"      "Mybl2"     "Sgk1"      "H1fx"      "Sh3bgr"    "Parp14"   
## [325] "Pla2g7"    "Il4i1"     "S100a4"    "Cpd"       "Rack1"     "Cd40"     
## [331] "Cnn3"      "Adam33"    "Lsp1"      "Gchfr"     "Srrm4"     "Syce1"    
## [337] "Clmn"      "Cd200"     "Lipo2"     "Tceal3"    "Tmem236"   "Bmp2"     
## [343] "Sytl1"     "Runx3"     "Actr3b"    "Gxylt2"    "Hrc"       "Hhip"     
## [349] "Car5a"     "Susd2"     "Grip1"     "Efr3b"     "Smoc1"     "Ubash3a"  
## [355] "Morc2b"    "Olfr113"   "Zp1"       "Nyx"       "Aox2"      "Sphkap"   
## [361] "Atp8b4"    "Hdc"       "Slc32a1"   "Slc2a10"   "Schip1"    "Sel1l3"   
## [367] "Srrm3"     "Neurod6"   "Emp1"      "Lmntd2"    "Spock3"    "Islr2"    
## [373] "Rbms3"     "Mterf2"    "Ccdc92b"   "Rflnb"     "Hecw1"     "Synpr"    
## [379] "Cdh6"      "Kcnq3"     "Foxred2"   "A4galt"    "Lta"       "Rit2"     
## [385] "Pcdhb4"    "Rhod"      "Ms4a14"    "Insl6"     "Map7d2"    "Fignl1"   
## [391] "Cenpk"     "Tcf19"     "Rad50"     "Ebf1"      "Lilr4b"    "Ccdc18"   
## [397] "Dnmt1"     "Plk4"      "Cenpi"     "Kif18a"    "Ccr5"      "Kcnip4"   
## [403] "Slc4a3"    "Pcdh17"    "Upk1b"     "Hmgn5"     "Chd4"      "Cenpw"    
## [409] "Plaur"     "Nsd2"      "Scd2"      "Eif5b"     "Ccn1"      "Edaradd"  
## [415] "Angptl4"   "Ankrd12"   "AU020206"  "Ankrd26"   "Gapdh"     "Fcgr4"    
## [421] "Dusp5"     "Cxcr4"     "Cep290"    "Brca2"     "Ncl"       "Nap1l1"   
## [427] "Zbp1"      "Ccdc88a"   "Slfn9"     "Acp5"      "S100a6"    "Lrr1"     
## [433] "Eif3a"     "Atrx"      "Cadm1"     "Psip1"     "Axl"       "Clec2i"   
## [439] "Ccl24"     "Nav3"      "Dbf4"      "Tarm1"     "Fmr1nb"    "Phf11b"   
## [445] "Tmsb10"    "Cd34"      "Mrvi1"     "Tert"      "Strip2"    "Tescl"    
## [451] "Ly6k"      "Spdl1"     "Smc3"      "Cdkn2d"    "Ska3"      "Nfkbia"   
## [457] "Oasl1"     "Rif1"      "Ccnd1"     "Lcorl"     "Mybl1"     "Nfasc"    
## [463] "Ehf"       "Gria2"     "She"       "Ankrd35"   "Tafa3"     "Calb1"    
## [469] "Cttnbp2"   "Clec4d"    "Pla2g4c"   "Upk1a"     "Napsa"     "Prr33"    
## [475] "Syne1"     "Scml4"     "Nrxn3"     "Crhbp"     "Trem1"     "Kcnip2"   
## [481] "Rab33a"    "Usp26"     "Fndc3c1"   "Alas2"     "Wnk3"      "Ciart"    
## [487] "Dnah2"     "Bicd1"     "Rnf213"    "Wdr41"     "Bod1l"     "Hpse"     
## [493] "Slc12a2"   "Creb5"     "Syt1"      "Cxcr2"     "Akap9"     "Oas3"     
## [499] "Eef1a1"    "Syt6"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB1) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB1)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

(GEX.seur@reductions$pca@feature.loadings %>% as.data.frame)[,1:8] %>% arrange(desc(PC_1) ) %>% head(40)
##                PC_1        PC_2         PC_3          PC_4         PC_5
## Pclaf    0.16540274  0.05657295  0.024587087 -0.0047782942  0.029624834
## Knl1     0.15169426  0.05366531  0.022129491 -0.0044743584 -0.026918672
## Prc1     0.14990740  0.06229109  0.026039386 -0.0027756306 -0.061906772
## Pbk      0.14684374  0.06182515  0.025621135 -0.0050188086 -0.046319711
## Spc24    0.14103116  0.04737086  0.019494964 -0.0078168867 -0.023512763
## Kif15    0.13683854  0.04994190  0.020337218 -0.0091786492 -0.034496461
## Lockd    0.13573152  0.04923311  0.023987664 -0.0134920458 -0.056327922
## Neil3    0.13408171  0.05186888  0.019243116 -0.0048612443 -0.019630255
## Esco2    0.13044049  0.05477780  0.017262897 -0.0044773869 -0.017175024
## Tk1      0.12489923  0.03776259  0.015210532 -0.0003529377  0.049783617
## Ccna2    0.12468177  0.05125062  0.018362159 -0.0048465194 -0.055674307
## Racgap1  0.12226672  0.04047054  0.016745660 -0.0167586291 -0.065118349
## Bub1b    0.12051462  0.03837992  0.021467036 -0.0052194555 -0.051195846
## Stmn1    0.11813498 -0.01744699  0.035962808 -0.0325233867  0.058847923
## Sgo2a    0.11773926  0.04965944  0.017148513 -0.0013178066 -0.041339644
## Spc25    0.11678167  0.05077152  0.014121859 -0.0058758429 -0.024119047
## Cit      0.11458957  0.04260997  0.016493600 -0.0035387456 -0.016240322
## Shcbp1   0.11218877  0.04374804  0.011458872 -0.0076020233 -0.020731778
## Mis18bp1 0.11149079  0.04038391  0.010829888 -0.0117453396 -0.042633311
## Aspm     0.11141058  0.04457378  0.015892148 -0.0071702577 -0.067227791
## Ccnf     0.11082441  0.05116468  0.011313525 -0.0063488964 -0.037471119
## Ncapg    0.10878178  0.03956813  0.017313076 -0.0032418726 -0.010115857
## Knstrn   0.10837572  0.02782491  0.020096949 -0.0188747088 -0.080260573
## Ccnb1    0.10471825  0.04125045  0.016408861 -0.0153529771 -0.083328581
## Plk1     0.10450738  0.04140679  0.015091464 -0.0093511950 -0.070307489
## Cenpm    0.10413109  0.02933237  0.012928158 -0.0111273384 -0.004132051
## Sgo1     0.10276411  0.04226031  0.013569585  0.0007850465 -0.030924638
## H2afx    0.10155065  0.03134362  0.011126905 -0.0131709974 -0.046760872
## Kif4     0.10057877  0.04143334  0.013660114 -0.0043585905 -0.041523525
## Trim59   0.09895733  0.03416718  0.019616657 -0.0063238235 -0.027856938
## Asf1b    0.09749316  0.03044386  0.013691767  0.0006040775  0.019697077
## Kif22    0.09675161  0.03553771  0.016714070 -0.0051175910 -0.049114359
## Cdkn3    0.09569471  0.03089181  0.018594564 -0.0178625122 -0.086120589
## Fbxo5    0.09526259  0.04630018  0.014520231 -0.0009311485 -0.010531163
## Smc2     0.09459293  0.02570141 -0.003617909 -0.0020587955  0.009062383
## Diaph3   0.09417816  0.02796283  0.009431229 -0.0017247823 -0.010746563
## Ncapg2   0.09217717  0.03256377  0.011178895 -0.0091264883  0.063484016
## Melk     0.08998016  0.03613563  0.010348335 -0.0054412275 -0.033910839
## Brip1    0.08590003  0.02497152  0.010348445  0.0025422057  0.037585929
## Brca1    0.08588070  0.02681117  0.004690474  0.0003010869  0.057995394
##                  PC_6         PC_7          PC_8
## Pclaf     0.005772053 -0.142440053  0.0072858994
## Knl1     -0.018039521 -0.003543918 -0.0117206237
## Prc1     -0.011932237  0.051956971 -0.0381578204
## Pbk      -0.007998518  0.029192502 -0.0368672050
## Spc24     0.005451505 -0.011311740 -0.0031586873
## Kif15    -0.016821174 -0.002611589 -0.0125360312
## Lockd     0.013707312  0.053255507 -0.0083935676
## Neil3    -0.013693766 -0.021143993 -0.0234609231
## Esco2    -0.010977889 -0.045789010 -0.0294958055
## Tk1       0.006567166 -0.160361314 -0.0042391035
## Ccna2    -0.006691496  0.059931252 -0.0178678109
## Racgap1   0.006699092  0.088111353  0.0005662194
## Bub1b     0.004221606  0.054686374 -0.0049659564
## Stmn1     0.062329723 -0.028187516  0.0492008990
## Sgo2a    -0.020885653  0.023216333 -0.0251020884
## Spc25    -0.009902977  0.010195795 -0.0283911059
## Cit      -0.009028875 -0.006345204 -0.0153491226
## Shcbp1   -0.016279055 -0.018311357 -0.0102810019
## Mis18bp1 -0.013591799  0.045158221 -0.0343127090
## Aspm     -0.004681193  0.099838841 -0.0202839648
## Ccnf     -0.011843897  0.022877387 -0.0246130480
## Ncapg    -0.017043593  0.010843724 -0.0286745641
## Knstrn    0.013016122  0.105204849  0.0425149433
## Ccnb1     0.006566855  0.124296507 -0.0197403242
## Plk1     -0.008603905  0.094135286 -0.0222321816
## Cenpm     0.007024911 -0.048304780 -0.0027308647
## Sgo1     -0.011153102  0.017777175 -0.0213937095
## H2afx    -0.007986646  0.069439674 -0.0073401892
## Kif4     -0.014663909  0.046484800 -0.0313199054
## Trim59   -0.002434715  0.003856424 -0.0132408047
## Asf1b    -0.003882445 -0.085175148 -0.0255799314
## Kif22    -0.005873223  0.050130109 -0.0197296391
## Cdkn3     0.016245281  0.119770181  0.0087380110
## Fbxo5    -0.016893089 -0.004951023 -0.0327750767
## Smc2      0.003642358 -0.029214591 -0.0101757428
## Diaph3    0.001884272  0.004563486 -0.0051303912
## Ncapg2   -0.016082308 -0.146879327  0.0004911132
## Melk     -0.009033634  0.017604120 -0.0176098405
## Brip1    -0.008300354 -0.123421527  0.0021912547
## Brca1    -0.001368171 -0.135848714  0.0008942544
(GEX.seur@reductions$pca@feature.loadings %>% as.data.frame)[,1:8] %>% arrange(PC_1 ) %>% head(40)
##                   PC_1        PC_2          PC_3          PC_4          PC_5
## Ctsd      -0.063912472 0.147360522 -0.0025436028 -0.2182541211  0.0023893591
## Pmp22     -0.034916245 0.123266604 -0.0076632301 -0.1218258551  0.0538667745
## Slc1a3    -0.031323621 0.088856951 -0.0118854198 -0.0258667401  0.0114260200
## Fam102b   -0.031075351 0.101484478  0.0058029370  0.0195541273  0.0627599823
## Lrba      -0.026774662 0.097491267  0.0222597119 -0.0019647790  0.0485776238
## Thrsp     -0.024708927 0.074763757  0.0179292117 -0.0369968053 -0.0056766399
## Ddit4     -0.022721332 0.056000515  0.0260588490  0.0039743317 -0.0019052693
## Sox4      -0.020363833 0.040746956  0.0780224576 -0.0119036584  0.0848529342
## Cd9       -0.019627152 0.068486145  0.0415058212 -0.2096625489  0.0438613253
## Pmepa1    -0.019571877 0.060473152  0.0582456300  0.0370803840  0.0533305177
## Csmd3     -0.018600962 0.070443581  0.0012599949  0.0153528536  0.0460835559
## Upk1b     -0.018309976 0.063849608  0.0113627531 -0.0103432188  0.0143677392
## Hpgd      -0.018294391 0.010138535  0.1126882693  0.0170262239  0.0371440491
## Abca1     -0.017839040 0.004474445  0.0119610249 -0.0547947825  0.0354847246
## Il7r      -0.017484454 0.071609360  0.0285959943  0.0259572444 -0.0002001133
## Tent5c    -0.017068756 0.055201880  0.0009433579 -0.0734284985  0.0462454468
## Ccl9      -0.016124829 0.034674603  0.0239026755 -0.0657819261  0.0868959351
## Inpp4b    -0.015019086 0.036390096  0.0150580897  0.0018490036  0.0569597715
## Zbtb20    -0.014393861 0.057937531  0.0242167392  0.0010968007  0.0279927571
## Tnfrsf17  -0.014008869 0.032915882  0.0474914121 -0.0054116645  0.0370310007
## Sgk1      -0.013164784 0.034768736  0.0605502964 -0.0245929327  0.0672668130
## Syngr1    -0.012693039 0.013819957  0.0159592753 -0.1070909739  0.0383179824
## Arid5b    -0.011669890 0.030660876  0.0213018025 -0.0356894728  0.0075075503
## Itpr2     -0.011511183 0.034070930 -0.0048627259  0.0129642133  0.0408851064
## Tent5a    -0.011018569 0.027858703 -0.0186897997  0.0298070083 -0.0091893557
## Plxna4os1 -0.010754719 0.020820242 -0.0031501273 -0.0147888430 -0.0021270310
## Slc12a2   -0.010728271 0.012634482  0.0559547716 -0.0532629398  0.1120011535
## Ccl6      -0.010218034 0.016461588  0.0419574314 -0.0801593408  0.0832876495
## Pitpnc1   -0.010114450 0.018564303  0.0282542064 -0.0057180080  0.0410084208
## Klrd1     -0.009902909 0.034765400  0.0234995627  0.0002361291  0.0131579926
## Ttc28     -0.009843043 0.039959120  0.0313951168 -0.0162315648  0.0487456075
## Cask      -0.009796421 0.038084675  0.0131455605 -0.0079796244  0.0296705647
## Tjp1      -0.009794462 0.047707666  0.0294390202  0.0426854020  0.0728141845
## Rcan1     -0.008028374 0.021335337 -0.0114718554 -0.0101198987  0.0285163762
## Klk8      -0.007979763 0.030769827 -0.0354585478 -0.0117269709 -0.0021685764
## Naalad2   -0.007882402 0.024045785 -0.0011711232 -0.0062992943  0.0060646293
## Arid4a    -0.007628311 0.020782543  0.0140280819  0.0225395999  0.0272426561
## Igf1r     -0.007457956 0.021225330  0.0184950325  0.0047834472  0.0468127184
## Ankrd12   -0.007332389 0.006161078  0.0164408760 -0.0465269719  0.0526168252
## Arap2     -0.006388564 0.027500172  0.0051068518 -0.0166260327  0.0170536577
##                   PC_6         PC_7         PC_8
## Ctsd      -0.002515829 -0.040006367  0.012275731
## Pmp22      0.035287689  0.020374622  0.072578113
## Slc1a3    -0.012397049 -0.004716050 -0.018723696
## Fam102b    0.023776202  0.025656055  0.063880173
## Lrba       0.060090931  0.021158227  0.073019629
## Thrsp      0.029275933 -0.021127153  0.063649395
## Ddit4     -0.027761972 -0.008018193  0.094272446
## Sox4       0.033778654  0.047517083 -0.031223476
## Cd9        0.042822348  0.018185392 -0.026972535
## Pmepa1     0.039732271  0.019700195  0.042565177
## Csmd3      0.021392295  0.025292307  0.024664169
## Upk1b      0.028222909 -0.004335107  0.055495647
## Hpgd      -0.006940112  0.016341416  0.020886206
## Abca1     -0.145422506  0.016975089 -0.046165048
## Il7r       0.020846226 -0.008271410  0.109294553
## Tent5c    -0.014817061  0.026544945  0.016766294
## Ccl9       0.032543880  0.017573424 -0.109075907
## Inpp4b    -0.037389528  0.026634265  0.032042181
## Zbtb20    -0.048354945 -0.009247586  0.049617204
## Tnfrsf17   0.033183197  0.017340504 -0.015645018
## Sgk1       0.021108367  0.041893322 -0.012323059
## Syngr1     0.022932739 -0.003933401  0.002641273
## Arid5b    -0.056661422  0.001093813  0.038275454
## Itpr2     -0.032328430  0.027646666  0.045753005
## Tent5a    -0.053471257  0.017667714  0.056875265
## Plxna4os1  0.010799601 -0.005907374  0.030530444
## Slc12a2   -0.024803392  0.020279699 -0.089885845
## Ccl6       0.053287726  0.025813923 -0.124188992
## Pitpnc1   -0.035382841  0.002511988 -0.001819823
## Klrd1      0.035360708  0.011108965  0.051297937
## Ttc28     -0.030374874  0.022558104  0.082154013
## Cask      -0.007063792  0.012251567  0.024786210
## Tjp1      -0.024520035  0.062446028  0.102262089
## Rcan1      0.012517807  0.015378793 -0.033568614
## Klk8       0.037186930 -0.008351035  0.031504227
## Naalad2   -0.009900958 -0.005377288  0.034675104
## Arid4a    -0.049062971  0.017930373  0.035261037
## Igf1r     -0.034288213 -0.005386432 -0.011200335
## Ankrd12   -0.065545182  0.039661002 -0.038891788
## Arap2     -0.003355397  0.004788855  0.007145943
decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:12
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12877
## Number of edges: 351905
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7160
## Number of communities: 13
## Elapsed time: 2 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 50, seed.use = 253)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 20:55:36 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:55:36 Read 12877 rows and found 12 numeric columns
## 20:55:36 Using Annoy for neighbor search, n_neighbors = 50
## 20:55:36 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:55:38 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\Rtmp4MX2u5\file5c603e127319
## 20:55:38 Searching Annoy index using 1 thread, search_k = 5000
## 20:55:45 Annoy recall = 100%
## 20:55:46 Commencing smooth kNN distance calibration using 1 thread
## 20:55:48 Initializing from normalized Laplacian + noise
## 20:55:48 Commencing optimization for 200 epochs, with 890426 positive edges
## 20:56:05 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 3, cols = color.FB1)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.info", cols = color.FB1)

check some markers

markers.mig <- c("Ptprc",#"Cd3d","Cd3e","Cd19",
                 "Cd74","Lyz2","Ccl4",
                 "Aif1","P2ry12","C1qa","Spp1",
                 "Top2a","Pcna","Mki67","Mcm6",
                 "Cx3cr1","Il4ra","Il13ra1","Fabp5",
                 "Hmox1","Ms4a7","Pf4","Tubb3",
                 "tdTomato-head","tdTomato-mid","tdTomato-tail","CreERT2"
                 )
FeaturePlot(GEX.seur, 
            features = markers.mig,
            ncol = 4)
## Warning in FeaturePlot(GEX.seur, features = markers.mig, ncol = 4): All cells
## have the same value (0) of CreERT2.

DotPlot(GEX.seur, features = markers.mig, group.by = "FB.info",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)

VlnPlot(GEX.seur, features = c("tdTomato-head","tdTomato-mid","tdTomato-tail","CreERT2"),
        group.by = "FB.info", ncol = 2)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of CreERT2.

DotPlot(GEX.seur, features = markers.mig, group.by = "seurat_clusters",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)

sort

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(3,4,6,5,7,11,12,10,
                                            9,
                                            1,0,2,8))
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$sort_clusters)[c(3:5,10:12),],
                   main = "Cell Count",
                   gaps_row = c(3),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$sort_clusters)[c(3:5,10:12),]),
                   main = "Cell Ratio",
                   gaps_row = c(3),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"

#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, 
#                                  min.pct = 0.05,
#                                  test.use = "MAST",
#                                  logfc.threshold = 0.25)
GEX.markers.pre <- read.table("sc10x_LYN.marker.subset1_sort1007.csv", header = TRUE, sep = ",")
GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
## # A tibble: 104 x 7
## # Groups:   cluster [13]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene         
##        <dbl>      <dbl> <dbl> <dbl>     <dbl>   <int> <chr>        
##  1 3.52e-212      0.500 1     1     6.30e-208       3 Fau          
##  2 1.44e-205      0.487 1     0.999 2.57e-201       3 Tpt1         
##  3 1.91e-193      0.484 1     0.999 3.41e-189       3 Sparc        
##  4 6.42e-132      0.475 0.999 0.995 1.15e-127       3 Ctsl         
##  5 2.07e-131      0.646 0.951 0.817 3.70e-127       3 Crybb1       
##  6 4.37e- 86      0.552 0.724 0.473 7.82e- 82       3 2410006H16Rik
##  7 1.13e- 80      0.491 0.83  0.618 2.02e- 76       3 Gas5         
##  8 7.43e- 34      1.01  0.162 0.074 1.33e- 29       3 Xist         
##  9 1.97e-320      0.774 0.999 0.913 3.53e-316       4 Cfl1         
## 10 2.15e-215      0.877 0.628 0.225 3.84e-211       4 Igfbp4       
## # ... with 94 more rows
GEX.markers.sort <- GEX.markers.pre
GEX.markers.sort$cluster <- factor(as.character(GEX.markers.sort$cluster),
                          levels = levels(GEX.seur$sort_clusters))


markers.pre_t48 <- (GEX.markers.sort %>% group_by(cluster) %>% 
                  filter(pct.1>0.1) %>%
                   top_n(n = 48, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t60 <- (GEX.markers.sort %>% group_by(cluster) %>% 
                  filter(pct.1>0.1 & gene %in% grep("Rps|Rpl",GEX.markers.sort$gene,invert = T,value = T)) %>%
                   top_n(n = 60, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t120 <- (GEX.markers.sort %>% group_by(cluster) %>% 
                  filter(pct.1>0.1 & gene %in% grep("Rps|Rpl",GEX.markers.sort$gene,invert = T,value = T)) %>%
                   top_n(n = 120, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[1:64]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[65:128]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[129:192]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[193:256]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[257:320]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[321:384]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[385:448]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[449:495]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

GEX.seur$cnt <- factor(as.character(GEX.seur$FB.info),
                       levels = levels(GEX.seur$FB.info)[c(12:10,5:3)])
color.cnt <- rev(color.FB1)

signature score

#### 10x data, calculate signature score       
#
## The code below is from Adam Hamber
## 2D scoring by Itay
get_controls <- function(counts, gene.list, verbose=F, control.genes.per.gene=10)
{
    # Itay: "Such scores are inevitably correlated with cell complexity so to avoid 
    # that I subtract a "control" score which is generated by averaging over a control 
    # gene set. Control gene sets are chosen to contain 100 times more genes than the 
    # real gene set (analogous to averaging over 100 control sets of similar size) and 
    # to have the same distribution of population/bulk - based expression levels as the 
    # real gene set, such that they are expected to have the same number of "zeros" and 
    # to eliminate the correlation with complexity."
    # ---------------------------------------------------------------------------------
    # Going to find control points by finding the closest genes in terms of expression level and % of the time we observe it
    if(verbose){cat(sprintf("Finding %s background genes based on similarity to given gene set [%s genes] \n", 
                            control.genes.per.gene*length(gene.list), length(gene.list)))}
    cat("Summarizing data \n")
    summary = data.frame(gene=row.names(counts), mean.expr = Matrix::rowMeans(counts), fract.zero = Matrix::rowMeans(counts==0), stringsAsFactors = F)
    #summary = data.frame(gene=row.names(counts), mean.expr = apply(counts,1,mean), fract.zero = apply(counts==0,1,mean), stringsAsFactors = F)
    summary$mean.expr.s = scale(summary$mean.expr)
    summary$fract.zero.s = scale(summary$fract.zero)
    actual.genes = summary[summary$gene %in% gene.list,]
    background.genes = summary[!summary$gene %in% gene.list,]
    
    #find the 10 closest genes to each cell cycle marker gene and add them to the lists of control genes
    get_closest_genes <- function(i)
    {
        background.genes$dist = sqrt((background.genes$mean.expr.s - actual.genes$mean.expr.s[i])^2 + 
                                         (background.genes$fract.zero.s - actual.genes$fract.zero.s[i])^2)
        ordered = background.genes$gene[order(background.genes$dist)]
        ordered = ordered[!ordered %in% controls] # don't take genes that already appear in the list 
        closest = head(ordered, n=control.genes.per.gene)
        return(closest)
    }
    controls = c();
    
    for (i in 1:length(gene.list)){
        #info(sprintf("Finding %s control genes for %s", control.genes.per.gene, gene.list[i]))
        closest = get_closest_genes(i)
        #info(sprintf("Found %s: ", length(closest)))
        controls = unique(c(controls, closest))
    }
    
    if(verbose){cat(sprintf("Control gene selection complete. %s genes found. \n", length(controls)))}
    #print(controls)
    return(controls)
}

## Define calculate function
calculate_signature_score <- function(count_matrix, gene_list){
    control_gene <- get_controls(counts = count_matrix,
                                 gene.list = gene_list)
    signature_score <- colMeans(count_matrix[gene_list, ], na.rm = TRUE) - 
        colMeans(count_matrix[control_gene, ], na.rm = TRUE)
    return(signature_score)
}

add_geneset_score <- function(obj, geneset, setname){
  score <- calculate_signature_score(as.data.frame(obj@assays[['RNA']]@data),
                                     geneset)
  obj <- AddMetaData(obj,
                     score,
                     setname)
  return(obj)
}
## proc_DEG()
#       to process edgeR result for DEGs' comparison
#              mat_cut is a matrix after advanced filtering, 'like TPM > n in at least one condition'
# 

proc_DEG <- function(deg, p.cut=0.05, FC.cut = 2, padj=TRUE, abs=TRUE, mat_cut=NULL, gene_cut=NULL){
    rownames(deg) <- deg$gene
    
    if(padj==TRUE){
        deg <- deg %>% filter(padj < p.cut)
    }else{
        deg <- deg %>% filter(pvalue < p.cut)
    }
    
    if(abs==TRUE){
        deg <- deg %>% filter(abs(FC) > FC.cut)
    }else if(FC.cut >0){
        deg <- deg %>% filter(FC > FC.cut)
    }else{
        deg <- deg %>% filter(FC < FC.cut)
    }
    
    if(!is.null(mat_cut)){
        deg <- deg[rownames(deg) %in% rownames(mat_cut),]
    }
    if(!is.null(gene_cut)){
        deg <- deg[rownames(deg) %in% gene_cut,]
    }
    return(deg)
}

DAM sig

DAM signature expression as feature plot (try top 50, top100, top250, top500)

DAM.sig <- read.csv("../../../20230811_10x_LYN/analysis_plus_exogene/figures1002/new/ranking_of_DAM_indicator_genes.csv")
DAM.sig[1:8,]
##   锘縍anking     Gene Frequence Total.score
## 1          1     Spp1        11    35.98221
## 2          2     Apoe        11    31.44704
## 3          3   Ifitm3         9    20.82901
## 4          4 Ifi27l2a         9    20.44047
## 5          5     Lyz2        11    20.22463
## 6          6     Cst7         9    19.48372
## 7          7     Cd74         7    18.24720
## 8          8   Lgals3         7    18.24180
DAM.list <- list(top50=DAM.sig$Gene[1:50],
                 top100=DAM.sig$Gene[1:100],
                 top250=DAM.sig$Gene[1:250],
                 top500=DAM.sig$Gene[1:500])
DAM.list
## $top50
##  [1] "Spp1"     "Apoe"     "Ifitm3"   "Ifi27l2a" "Lyz2"     "Cst7"    
##  [7] "Cd74"     "Lgals3"   "Lpl"      "Cd52"     "Ccl5"     "Ccl12"   
## [13] "Lgals3bp" "Cd63"     "H2-Ab1"   "Fn1"      "H2-Aa"    "Fxyd5"   
## [19] "Ccl4"     "Cybb"     "Bst2"     "Cstb"     "H2-Eb1"   "Fth1"    
## [25] "Vim"      "Tspo"     "Ctsb"     "Ccl3"     "Axl"      "Anxa5"   
## [31] "Isg15"    "Lgals1"   "Ccl2"     "Ifi204"   "Igf1"     "Ch25h"   
## [37] "Mif"      "Cxcl10"   "Plin2"    "Fabp5"    "Il1b"     "Crip1"   
## [43] "Slfn2"    "B2m"      "Irf7"     "Cd72"     "Capg"     "Ms4a6c"  
## [49] "Ifit3"    "Apoc1"   
## 
## $top100
##   [1] "Spp1"     "Apoe"     "Ifitm3"   "Ifi27l2a" "Lyz2"     "Cst7"    
##   [7] "Cd74"     "Lgals3"   "Lpl"      "Cd52"     "Ccl5"     "Ccl12"   
##  [13] "Lgals3bp" "Cd63"     "H2-Ab1"   "Fn1"      "H2-Aa"    "Fxyd5"   
##  [19] "Ccl4"     "Cybb"     "Bst2"     "Cstb"     "H2-Eb1"   "Fth1"    
##  [25] "Vim"      "Tspo"     "Ctsb"     "Ccl3"     "Axl"      "Anxa5"   
##  [31] "Isg15"    "Lgals1"   "Ccl2"     "Ifi204"   "Igf1"     "Ch25h"   
##  [37] "Mif"      "Cxcl10"   "Plin2"    "Fabp5"    "Il1b"     "Crip1"   
##  [43] "Slfn2"    "B2m"      "Irf7"     "Cd72"     "Capg"     "Ms4a6c"  
##  [49] "Ifit3"    "Apoc1"    "Fcgr4"    "Il1rn"    "Wfdc17"   "Cxcl2"   
##  [55] "Cxcl16"   "Prdx1"    "Rtp4"     "H2-D1"    "Pkm"      "Stat1"   
##  [61] "Anxa2"    "Gpnmb"    "Zbp1"     "Ftl1"     "Ldha"     "Npc2"    
##  [67] "Ly6a"     "Oasl2"    "Gapdh"    "Prdx5"    "Gbp2"     "Grn"     
##  [73] "Ifi207"   "Ifitm2"   "Tlr2"     "Txn1"     "Phf11b"   "Ctsz"    
##  [79] "Pianp"    "Cd36"     "Itgax"    "Fgl2"     "Ccl6"     "Iigp1"   
##  [85] "Ccl7"     "H2-K1"    "Pld3"     "Tpi1"     "Akr1a1"   "Usp18"   
##  [91] "Rab7b"    "Tmsb10"   "Cd44"     "Aldoa"    "Hcar2"    "Acod1"   
##  [97] "Cd300lb"  "Ctsc"     "Gpr65"    "H2-Q7"   
## 
## $top250
##   [1] "Spp1"     "Apoe"     "Ifitm3"   "Ifi27l2a" "Lyz2"     "Cst7"    
##   [7] "Cd74"     "Lgals3"   "Lpl"      "Cd52"     "Ccl5"     "Ccl12"   
##  [13] "Lgals3bp" "Cd63"     "H2-Ab1"   "Fn1"      "H2-Aa"    "Fxyd5"   
##  [19] "Ccl4"     "Cybb"     "Bst2"     "Cstb"     "H2-Eb1"   "Fth1"    
##  [25] "Vim"      "Tspo"     "Ctsb"     "Ccl3"     "Axl"      "Anxa5"   
##  [31] "Isg15"    "Lgals1"   "Ccl2"     "Ifi204"   "Igf1"     "Ch25h"   
##  [37] "Mif"      "Cxcl10"   "Plin2"    "Fabp5"    "Il1b"     "Crip1"   
##  [43] "Slfn2"    "B2m"      "Irf7"     "Cd72"     "Capg"     "Ms4a6c"  
##  [49] "Ifit3"    "Apoc1"    "Fcgr4"    "Il1rn"    "Wfdc17"   "Cxcl2"   
##  [55] "Cxcl16"   "Prdx1"    "Rtp4"     "H2-D1"    "Pkm"      "Stat1"   
##  [61] "Anxa2"    "Gpnmb"    "Zbp1"     "Ftl1"     "Ldha"     "Npc2"    
##  [67] "Ly6a"     "Oasl2"    "Gapdh"    "Prdx5"    "Gbp2"     "Grn"     
##  [73] "Ifi207"   "Ifitm2"   "Tlr2"     "Txn1"     "Phf11b"   "Ctsz"    
##  [79] "Pianp"    "Cd36"     "Itgax"    "Fgl2"     "Ccl6"     "Iigp1"   
##  [85] "Ccl7"     "H2-K1"    "Pld3"     "Tpi1"     "Akr1a1"   "Usp18"   
##  [91] "Rab7b"    "Tmsb10"   "Cd44"     "Aldoa"    "Hcar2"    "Acod1"   
##  [97] "Cd300lb"  "Ctsc"     "Gpr65"    "H2-Q7"    "Cdkn1a"   "Psat1"   
## [103] "Trim30a"  "Cxcl14"   "Srgn"     "Mmp12"    "Bcl2a1b"  "Tap1"    
## [109] "AB124611" "Xaf1"     "Ly6e"     "Psme1"    "Ctsl"     "Sp100"   
## [115] "Cxcr4"    "Psmb8"    "AA467197" "Pgk1"     "Emp3"     "Csf1"    
## [121] "Mcemp1"   "Gusb"     "Pim1"     "Ctse"     "Cox4i1"   "Il12b"   
## [127] "Msrb1"    "Npm1"     "Alcam"    "Psme2"    "Apoc2"    "Bhlhe40" 
## [133] "Stmn1"    "Gm4951"   "Pla2g7"   "Plaur"    "Tor3a"    "Hspe1"   
## [139] "Tpt1"     "Ndufa1"   "Flt1"     "Tgfbi"    "Cox6c"    "Irgm1"   
## [145] "Ifit2"    "Uba52"    "Igf2r"    "Bola2"    "Ank"      "Tyrobp"  
## [151] "Tpm4"     "Ass1"     "Ms4a4c"   "Ifit1"    "Ybx1"     "Sod2"    
## [157] "Cox8a"    "Fam20c"   "Oas1a"    "Arg1"     "Ms4a7"    "Smpdl3a" 
## [163] "Sh3pxd2b" "Fau"      "Gnas"     "Phf11d"   "Ehd1"     "Saa3"    
## [169] "Cox5a"    "Atox1"    "Id3"      "Lrpap1"   "Olr1"     "Sh3bgrl3"
## [175] "Sash1"    "Hint1"    "Il2rg"    "Ctsd"     "Postn"    "H2-T23"  
## [181] "Ucp2"     "Sdc3"     "Uqcrq"    "Cox6a1"   "Cd300lf"  "Syngr1"  
## [187] "Timp2"    "Atp5e"    "Id2"      "S100a6"   "Cd14"     "Tubb6"   
## [193] "Anp32b"   "Fcgr2b"   "Cd83"     "Psmb9"    "Bcl2a1a"  "Aprt"    
## [199] "Mfsd12"   "Psap"     "Cox6b1"   "Lilr4b"   "Plac8"    "Glrx"    
## [205] "Scimp"    "Rilpl2"   "Psmb6"    "Gm11808"  "Chmp4b"   "Actr3b"  
## [211] "Ly86"     "Fundc2"   "Ifi211"   "Hif1a"    "Serf2"    "Dram1"   
## [217] "Parp14"   "Ptgs2"    "Cxcl9"    "Myo5a"    "Gabarap"  "Cyp4f18" 
## [223] "Shisa5"   "Lilrb4a"  "Cpd"      "Iqgap1"   "Slfn5"    "Tnfaip2" 
## [229] "Nme1"     "Cotl1"    "Tagln2"   "Mt1"      "Mpeg1"    "C3"      
## [235] "Ube2l6"   "Clic4"    "Naaa"     "Gas7"     "Sdcbp"    "Nampt"   
## [241] "Ell2"     "Samhd1"   "Rtcb"     "Eef1g"    "Hmgb2"    "Gng5"    
## [247] "Nfil3"    "Adora1"   "Tpd52"    "Ptger4"  
## 
## $top500
##   [1] "Spp1"          "Apoe"          "Ifitm3"        "Ifi27l2a"     
##   [5] "Lyz2"          "Cst7"          "Cd74"          "Lgals3"       
##   [9] "Lpl"           "Cd52"          "Ccl5"          "Ccl12"        
##  [13] "Lgals3bp"      "Cd63"          "H2-Ab1"        "Fn1"          
##  [17] "H2-Aa"         "Fxyd5"         "Ccl4"          "Cybb"         
##  [21] "Bst2"          "Cstb"          "H2-Eb1"        "Fth1"         
##  [25] "Vim"           "Tspo"          "Ctsb"          "Ccl3"         
##  [29] "Axl"           "Anxa5"         "Isg15"         "Lgals1"       
##  [33] "Ccl2"          "Ifi204"        "Igf1"          "Ch25h"        
##  [37] "Mif"           "Cxcl10"        "Plin2"         "Fabp5"        
##  [41] "Il1b"          "Crip1"         "Slfn2"         "B2m"          
##  [45] "Irf7"          "Cd72"          "Capg"          "Ms4a6c"       
##  [49] "Ifit3"         "Apoc1"         "Fcgr4"         "Il1rn"        
##  [53] "Wfdc17"        "Cxcl2"         "Cxcl16"        "Prdx1"        
##  [57] "Rtp4"          "H2-D1"         "Pkm"           "Stat1"        
##  [61] "Anxa2"         "Gpnmb"         "Zbp1"          "Ftl1"         
##  [65] "Ldha"          "Npc2"          "Ly6a"          "Oasl2"        
##  [69] "Gapdh"         "Prdx5"         "Gbp2"          "Grn"          
##  [73] "Ifi207"        "Ifitm2"        "Tlr2"          "Txn1"         
##  [77] "Phf11b"        "Ctsz"          "Pianp"         "Cd36"         
##  [81] "Itgax"         "Fgl2"          "Ccl6"          "Iigp1"        
##  [85] "Ccl7"          "H2-K1"         "Pld3"          "Tpi1"         
##  [89] "Akr1a1"        "Usp18"         "Rab7b"         "Tmsb10"       
##  [93] "Cd44"          "Aldoa"         "Hcar2"         "Acod1"        
##  [97] "Cd300lb"       "Ctsc"          "Gpr65"         "H2-Q7"        
## [101] "Cdkn1a"        "Psat1"         "Trim30a"       "Cxcl14"       
## [105] "Srgn"          "Mmp12"         "Bcl2a1b"       "Tap1"         
## [109] "AB124611"      "Xaf1"          "Ly6e"          "Psme1"        
## [113] "Ctsl"          "Sp100"         "Cxcr4"         "Psmb8"        
## [117] "AA467197"      "Pgk1"          "Emp3"          "Csf1"         
## [121] "Mcemp1"        "Gusb"          "Pim1"          "Ctse"         
## [125] "Cox4i1"        "Il12b"         "Msrb1"         "Npm1"         
## [129] "Alcam"         "Psme2"         "Apoc2"         "Bhlhe40"      
## [133] "Stmn1"         "Gm4951"        "Pla2g7"        "Plaur"        
## [137] "Tor3a"         "Hspe1"         "Tpt1"          "Ndufa1"       
## [141] "Flt1"          "Tgfbi"         "Cox6c"         "Irgm1"        
## [145] "Ifit2"         "Uba52"         "Igf2r"         "Bola2"        
## [149] "Ank"           "Tyrobp"        "Tpm4"          "Ass1"         
## [153] "Ms4a4c"        "Ifit1"         "Ybx1"          "Sod2"         
## [157] "Cox8a"         "Fam20c"        "Oas1a"         "Arg1"         
## [161] "Ms4a7"         "Smpdl3a"       "Sh3pxd2b"      "Fau"          
## [165] "Gnas"          "Phf11d"        "Ehd1"          "Saa3"         
## [169] "Cox5a"         "Atox1"         "Id3"           "Lrpap1"       
## [173] "Olr1"          "Sh3bgrl3"      "Sash1"         "Hint1"        
## [177] "Il2rg"         "Ctsd"          "Postn"         "H2-T23"       
## [181] "Ucp2"          "Sdc3"          "Uqcrq"         "Cox6a1"       
## [185] "Cd300lf"       "Syngr1"        "Timp2"         "Atp5e"        
## [189] "Id2"           "S100a6"        "Cd14"          "Tubb6"        
## [193] "Anp32b"        "Fcgr2b"        "Cd83"          "Psmb9"        
## [197] "Bcl2a1a"       "Aprt"          "Mfsd12"        "Psap"         
## [201] "Cox6b1"        "Lilr4b"        "Plac8"         "Glrx"         
## [205] "Scimp"         "Rilpl2"        "Psmb6"         "Gm11808"      
## [209] "Chmp4b"        "Actr3b"        "Ly86"          "Fundc2"       
## [213] "Ifi211"        "Hif1a"         "Serf2"         "Dram1"        
## [217] "Parp14"        "Ptgs2"         "Cxcl9"         "Myo5a"        
## [221] "Gabarap"       "Cyp4f18"       "Shisa5"        "Lilrb4a"      
## [225] "Cpd"           "Iqgap1"        "Slfn5"         "Tnfaip2"      
## [229] "Nme1"          "Cotl1"         "Tagln2"        "Mt1"          
## [233] "Mpeg1"         "C3"            "Ube2l6"        "Clic4"        
## [237] "Naaa"          "Gas7"          "Sdcbp"         "Nampt"        
## [241] "Ell2"          "Samhd1"        "Rtcb"          "Eef1g"        
## [245] "Hmgb2"         "Gng5"          "Nfil3"         "Adora1"       
## [249] "Tpd52"         "Ptger4"        "Eif2ak2"       "Areg"         
## [253] "Rsad2"         "Slc31a1"       "Gm2000"        "Cox7c"        
## [257] "Tmem256"       "Cox5b"         "Cyba"          "Il18bp"       
## [261] "Selenow"       "Myl6"          "H2-DMa"        "Rai14"        
## [265] "Dab2"          "Hmox1"         "Tmsb4x"        "Ndufa2"       
## [269] "Cd68"          "Ccdc86"        "Atp6v1a"       "Cox7a2"       
## [273] "Calm1"         "Uqcrh"         "Socs2"         "Gpi1"         
## [277] "Ubl5"          "Colec12"       "Ifit3b"        "Scpep1"       
## [281] "S100a11"       "F3"            "Ms4a6d"        "Hacd2"        
## [285] "Chil3"         "Tuba1c"        "Rap2b"         "Gp49a"        
## [289] "Milr1"         "Fcgr1"         "Stat2"         "Atp5j2"       
## [293] "Uqcr10"        "Ssr4"          "Bcar3"         "Gm49368"      
## [297] "Tmem106a"      "Tubb5"         "Naca"          "Hspa8"        
## [301] "Atp5k"         "Bag1"          "Sec61b"        "Gyg"          
## [305] "Cox7b"         "Ly6c2"         "Top2a"         "Aldh2"        
## [309] "Dpp7"          "Eif3k"         "Cd48"          "C4b"          
## [313] "Pycard"        "Atp5l"         "Uqcr11"        "Osm"          
## [317] "Prelid1"       "Rnf213"        "Prdx4"         "Arpc1b"       
## [321] "Ndufv3"        "Sp110"         "Ndufa13"       "Abca1"        
## [325] "Gm1673"        "Wdr89"         "Sh3bgrl"       "Smdt1"        
## [329] "Gatm"          "Gpr84"         "Slamf8"        "Ccrl2"        
## [333] "Tomm7"         "Gas8"          "Ly6i"          "Bcl2a1d"      
## [337] "Esd"           "Dhx58"         "Ctsa"          "Rxrg"         
## [341] "Prdx6"         "Ndufc1"        "Polr2f"        "Sdc4"         
## [345] "Atp5j"         "Litaf"         "Atp6v0e"       "Cspg4"        
## [349] "Ranbp1"        "Ifi35"         "Fcer1g"        "Calm3"        
## [353] "Rhoc"          "Pde4b"         "Atp5g1"        "Gpx3"         
## [357] "Psmb1"         "Gpx1"          "Eef1a1"        "Ly9"          
## [361] "Igtp"          "H2-Q6"         "Herc6"         "Cd9"          
## [365] "Ran"           "Hebp1"         "Eno1"          "Cdk18"        
## [369] "Eef1b2"        "Gbp7"          "Glipr1"        "Atp6v1f"      
## [373] "H2-DMb1"       "Btf3"          "Slc25a3"       "Brdt"         
## [377] "Csf2ra"        "Eif3f"         "Hpse"          "Gm14305"      
## [381] "Gm14410"       "H2-Q4"         "Ndufb9"        "Epsti1"       
## [385] "Dnaaf3"        "Pf4"           "S100a4"        "Atp5h"        
## [389] "Apobec3"       "Hsp90ab1"      "Tnf"           "Ctss"         
## [393] "Gas6"          "Gbp3"          "Gng12"         "Nceh1"        
## [397] "Mgst1"         "Cfl1"          "Dtnbp1"        "Slc2a6"       
## [401] "Eif5a"         "Atp5c1"        "ptchd1"        "Ptchd1"       
## [405] "Psma7"         "Lap3"          "Rbm3"          "Cycs"         
## [409] "Cox6a2"        "Abcg1"         "Prr15"         "Ahnak"        
## [413] "Ndufb7"        "Nrp1"          "Lmnb1"         "Ninj1"        
## [417] "Mrpl33"        "Arpc3"         "Phlda1"        "Acp5"         
## [421] "Atp5g3"        "C5ar1"         "Arl5c"         "Parp9"        
## [425] "Ndufb8"        "Txndc17"       "Tbca"          "Gm49339"      
## [429] "Tma7"          "Fblim1"        "Eif3h"         "Psme2b"       
## [433] "Mrpl30"        "Arpc2"         "Ptma"          "Rac2"         
## [437] "Ctsh"          "Dcstamp"       "Clec4n"        "Dbi"          
## [441] "H2-T22"        "Sem1"          "Msr1"          "Bax"          
## [445] "S100a10"       "Fabp3"         "Snrpg"         "Bcl2a1"       
## [449] "Serpine2"      "Snrpd2"        "Cd180"         "Pgam1"        
## [453] "2310061I04Rik" "S100a1"        "Serpinb6a"     "Actg1"        
## [457] "Uqcc2"         "Tuba1b"        "Neurl2"        "Gcnt2"        
## [461] "Smc2"          "Atp5b"         "Ifih1"         "Nap1l1"       
## [465] "Cd274"         "Rgs1"          "Parp12"        "Serpine1"     
## [469] "Nsa2"          "Cebpb"         "Csf2rb"        "Cfb"          
## [473] "Ndufa6"        "Sdf2l1"        "Anxa4"         "Psma5"        
## [477] "Nfkbiz"        "Ctla2a"        "Atp5o"         "Ube2l3"       
## [481] "Lipa"          "Pfn1"          "Ndufa7"        "Ndufs6"       
## [485] "Psmb10"        "Mapkapk2"      "Aif1"          "Smc4"         
## [489] "Itgb1bp1"      "Nme2"          "Pfdn5"         "Rassf3"       
## [493] "1810058I24Rik" "Pgls"          "Clec4d"        "Mrpl54"       
## [497] "Tmem160"       "Pomp"          "Slc7a11"       "F10"
lapply(DAM.list, length)
## $top50
## [1] 50
## 
## $top100
## [1] 100
## 
## $top250
## [1] 250
## 
## $top500
## [1] 500
GEX.seur <- add_geneset_score(GEX.seur, DAM.list$top50, "DAM.sig_top50")
## Summarizing data
GEX.seur <- add_geneset_score(GEX.seur, DAM.list$top100, "DAM.sig_top100")
## Summarizing data
GEX.seur <- add_geneset_score(GEX.seur, DAM.list$top250, "DAM.sig_top250")
## Summarizing data
GEX.seur <- add_geneset_score(GEX.seur, DAM.list$top500, "DAM.sig_top500")
## Summarizing data
ppnew.2.v1 <- VlnPlot(GEX.seur, features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), 
                      group.by = "cnt", cols = color.cnt,
                      ncol = 2, pt.size = 0, raster = F) & geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.02)
ppnew.2.v1

ppnew.2.v1 <- VlnPlot(GEX.seur, features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), 
                      same.y.lims = T,
                      group.by = "cnt", cols = color.cnt,
                      ncol = 2, pt.size = 0, raster = F) & 
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("P06.CTR","P06.MIG"),
                                                  c("P06.MIG","P06.TDT"),
                                                  c("P06.CTR","P06.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.MIG"),
                                                  c("P30.PBS.MIG","P30.PBS.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.TDT"),
                                                  c("P06.TDT","P30.PBS.TDT")),
                               label.y = c(0.55,0.75,0.95,0.55,0.75,0.95,1.1),
                               size=3
                               )
ppnew.2.v1

ppnew.2a <- FeaturePlot(GEX.seur,features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), ncol = 4,
                        raster = T, pt.size = 3.5,
            keep.scale = "all")
ppnew.2a

mapal <- colorRampPalette(RColorBrewer::brewer.pal(4,"Spectral"))(120)
ppnew.2d <- FeaturePlot(GEX.seur,features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), ncol = 4,
                        raster = T, pt.size = 3.5,
            keep.scale = "all") & scale_color_gradientn(colors = rev(mapal))
ppnew.2d

#saveRDS(GEX.seur, "Spp1tdt.subset1_P06_P30PBS.sort1007.rds")

subset1 DEGs

GEX.comp <- GEX.seur
Idents(GEX.comp) <- "cnt"
GEX.comp
## An object of class Seurat 
## 17900 features across 12877 samples within 1 assay 
## Active assay: RNA (17900 features, 1090 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
GEX.comp$cnt[1:5]
## AAACCCAAGTCGGCCT-1 AAACCCAAGTCTGCGC-1 AAACCCACAAACTCGT-1 AAACCCACAGTAGAAT-1 
##            P06.TDT            P06.TDT        P30.PBS.MIG            P06.CTR 
## AAACCCACATCCGTGG-1 
##        P30.PBS.TDT 
## Levels: P06.CTR P06.MIG P06.TDT P30.PBS.CTR P30.PBS.MIG P30.PBS.TDT
DEGs.a <- list()


for(mm in c("P06","P30.PBS")){
  for(nn in c("CTR","MIG")){
    DEGs.a[[paste0(mm,".TDTvs",nn)]] <- FindAllMarkers(subset(GEX.comp, subset = cnt %in% c(paste0(mm,".",nn),paste0(mm,".TDT")) & age %in% c(mm)),
                                                       assay = "RNA",
                                                       only.pos = T,
                                                       min.pct = 0.05, 
                                                       logfc.threshold = 0.1,
                                                       test.use = "MAST")
    
     
  }
}
## Calculating cluster P06.CTR
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P06.TDT
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P06.MIG
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P06.TDT
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.PBS.CTR
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.PBS.TDT
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.PBS.MIG
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.PBS.TDT
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
#DEGs.a
names(DEGs.a)
## [1] "P06.TDTvsCTR"     "P06.TDTvsMIG"     "P30.PBS.TDTvsCTR" "P30.PBS.TDTvsMIG"
# save DEGs' table
df.DEGs.a <- data.frame()

for(nn in names(DEGs.a)){
  df.DEGs.a <- rbind(df.DEGs.a, data.frame(DEGs.a[[nn]], contrast = nn))
}

#write.csv(df.DEGs.a, "DEGs.subset1.csv")

DEG stat

cut.padj = 0.05
cut.log2FC = 0.3   
  
cut.pct1 = 0.05

df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
##           contrast     cluster up.DEGs
## 1     P06.TDTvsCTR     P06.CTR      14
## 2     P06.TDTvsCTR     P06.TDT      18
## 3     P06.TDTvsMIG     P06.TDT      14
## 4     P06.TDTvsMIG     P06.MIG       3
## 5 P30.PBS.TDTvsCTR P30.PBS.CTR       2
## 6 P30.PBS.TDTvsCTR P30.PBS.TDT       7
## 7 P30.PBS.TDTvsMIG P30.PBS.TDT       1
## 8 P30.PBS.TDTvsMIG P30.PBS.MIG       3
pp.stat.DEG <- list()
df.DEGs.a$cluster <- factor(as.character(df.DEGs.a$cluster),
                            levels = levels(GEX.seur$cnt))
pp.stat.DEG[["a"]] <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=contrast, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>0.05, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Number of DEGs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))
pp.stat.DEG[["a"]]

cut.padj = 0.05
cut.log2FC = 0.1   
  
cut.pct1 = 0.05

df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
##           contrast     cluster up.DEGs
## 1     P06.TDTvsCTR     P06.CTR     174
## 2     P06.TDTvsCTR     P06.TDT      84
## 3     P06.TDTvsMIG     P06.MIG      70
## 4     P06.TDTvsMIG     P06.TDT      61
## 5 P30.PBS.TDTvsCTR P30.PBS.CTR      28
## 6 P30.PBS.TDTvsCTR P30.PBS.TDT      35
## 7 P30.PBS.TDTvsMIG P30.PBS.MIG      21
## 8 P30.PBS.TDTvsMIG P30.PBS.TDT       5
pp.stat.DEG[["a"]] <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=contrast, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>0.05, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Number of DEGs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))
pp.stat.DEG[["a"]]

DEG plot

pp.DEGs.a <- lapply(DEGs.a, function(x){NA})
names(DEGs.a)
## [1] "P06.TDTvsCTR"     "P06.TDTvsMIG"     "P30.PBS.TDTvsCTR" "P30.PBS.TDTvsMIG"
DEGs.a.combine <- DEGs.a
DEGs.a.combine <- lapply(DEGs.a.combine, function(x){
  for(zz in c("P06.MIG","P06.CTR","P30.PBS.MIG","P30.PBS.CTR")){
    x[x$cluster==zz,"avg_log2FC"] <- -x[x$cluster==zz,"avg_log2FC"]
  }
  
  x
})
pp.DEGs.a$P06.TDTvsCTR <- DEGs.a.combine$P06.TDTvsCTR %>%
  mutate(label=ifelse(((p_val_adj < 1e-8 & avg_log2FC<0) | (p_val_adj < 1e-8 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTR",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTR"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P06, TDT vs CTR") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,2))
pp.DEGs.a$P06.TDTvsCTR

pp.DEGs.a$P06.TDTvsMIG <- DEGs.a.combine$P06.TDTvsMIG %>%
  mutate(label=ifelse(((p_val_adj < 1e-5 & avg_log2FC<0) | (p_val_adj < 1e-5 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"MIG",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("MIG"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P06, TDT vs MIG") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,1))
pp.DEGs.a$P06.TDTvsMIG

pp.DEGs.a$P30.PBS.TDTvsCTR <- DEGs.a.combine$P30.PBS.TDTvsCTR %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTR",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTR"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P30.PBS, TDT vs CTR") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-1,2))
pp.DEGs.a$P30.PBS.TDTvsCTR

pp.DEGs.a$P30.PBS.TDTvsMIG <- DEGs.a.combine$P30.PBS.TDTvsMIG %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"MIG",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("MIG"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P30.PBS, TDT vs MIG") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-1,2))
pp.DEGs.a$P30.PBS.TDTvsMIG

new clusters

GEX.seur$new_clusters <- as.character(GEX.seur$seurat_clusters)

GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(4)] <- "C1"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(3,6)] <- "C2"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(5)] <- "C3"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(7)] <- "C4"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(11)] <- "C5"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(12)] <- "C6"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(10)] <- "C7"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(9)] <- "C8"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(0)] <- "C9"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(1,2)] <- "C10"
GEX.seur$new_clusters[GEX.seur$new_clusters %in% c(8)] <- "C11"

GEX.seur$new_clusters <- factor(GEX.seur$new_clusters,
                                paste0("C",1:11))
head(GEX.seur@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCAAGTCGGCCT-1    Spp1tdt       5529         2163   5.498282  17.887502
## AAACCCAAGTCTGCGC-1    Spp1tdt       8466         2389   3.874321  26.836759
## AAACCCACAAACTCGT-1    Spp1tdt       4188         1841   2.960840   8.572111
## AAACCCACAGTAGAAT-1    Spp1tdt       6855         2585   2.727936  14.529540
## AAACCCACATCCGTGG-1    Spp1tdt       2337         1225   3.979461   7.659392
## AAACCCAGTTACGCCG-1    Spp1tdt       2915         1382   1.234991  13.687822
##                        FB.info     S.Score    G2M.Score Phase RNA_snn_res.1.5
## AAACCCAAGTCGGCCT-1     P06.TDT -0.04932171 -0.114069796    G1              12
## AAACCCAAGTCTGCGC-1     P06.TDT -0.02781008 -0.083520479    G1              12
## AAACCCACAAACTCGT-1 P30.PBS.MIG  0.01921373  0.060314405   G2M               0
## AAACCCACAGTAGAAT-1     P06.CTR  0.45758583  0.005436139     S               8
## AAACCCACATCCGTGG-1 P30.PBS.TDT  0.04897564 -0.042116708     S               2
## AAACCCAGTTACGCCG-1 P30.PBS.TDT -0.01777409 -0.043560404    G1               1
##                    seurat_clusters pANN_0.25_0.005_1191 DoubletFinder0.05
## AAACCCAAGTCGGCCT-1               5          0.106918239           Singlet
## AAACCCAAGTCTGCGC-1               5          0.044025157           Singlet
## AAACCCACAAACTCGT-1               1          0.006289308           Singlet
## AAACCCACAGTAGAAT-1               4          0.377358491           Singlet
## AAACCCACATCCGTGG-1               1          0.000000000           Singlet
## AAACCCAGTTACGCCG-1               0          0.000000000           Singlet
##                    pANN_0.25_0.005_2383 DoubletFinder0.1 sort_clusters     age
## AAACCCAAGTCGGCCT-1          0.113207547          Singlet             5     P06
## AAACCCAAGTCTGCGC-1          0.044025157          Singlet             5     P06
## AAACCCACAAACTCGT-1          0.006289308          Singlet             1 P30.PBS
## AAACCCACAGTAGAAT-1          0.364779874          Doublet             4     P06
## AAACCCACATCCGTGG-1          0.000000000          Singlet             1 P30.PBS
## AAACCCAGTTACGCCG-1          0.000000000          Singlet             0 P30.PBS
##                            cnt   preAnno RNA_snn_res.1.2 RNA_snn_res.1
## AAACCCAAGTCGGCCT-1     P06.TDT Microglia              11             5
## AAACCCAAGTCTGCGC-1     P06.TDT Microglia              11             5
## AAACCCACAAACTCGT-1 P30.PBS.MIG Microglia               3             1
## AAACCCACAGTAGAAT-1     P06.CTR Microglia               2             4
## AAACCCACATCCGTGG-1 P30.PBS.TDT Microglia               3             1
## AAACCCAGTTACGCCG-1 P30.PBS.TDT Microglia               0             0
##                    DAM.sig_top50 DAM.sig_top100 DAM.sig_top250 DAM.sig_top500
## AAACCCAAGTCGGCCT-1    0.15167590   -0.017812540     0.02914850     0.20464023
## AAACCCAAGTCTGCGC-1    0.39501325    0.269974027     0.23830051     0.38608163
## AAACCCACAAACTCGT-1    0.06770196    0.077388291     0.03449641     0.16261439
## AAACCCACAGTAGAAT-1   -0.16981972   -0.108029609    -0.06343684     0.07254075
## AAACCCACATCCGTGG-1    0.14346077    0.008080233    -0.01448075     0.11958677
## AAACCCAGTTACGCCG-1   -0.10833692   -0.041742952    -0.01268726     0.17698730
##                    new_clusters    age1      FB.new     cluster_cnt
## AAACCCAAGTCGGCCT-1           C3     P06     P06.TDT      C3_P06.TDT
## AAACCCAAGTCTGCGC-1           C3     P06     P06.TDT      C3_P06.TDT
## AAACCCACAAACTCGT-1          C10 P30.PBS P30.PBS.MIG C10_P30.PBS.MIG
## AAACCCACAGTAGAAT-1           C1     P06     P06.CTR      C1_P06.CTR
## AAACCCACATCCGTGG-1          C10 P30.PBS P30.PBS.TDT C10_P30.PBS.TDT
## AAACCCAGTTACGCCG-1           C9 P30.PBS P30.PBS.TDT  C9_P30.PBS.TDT
#
GEX.seur$age1 <- factor(as.character(GEX.seur$age),
                        levels = c("P30.PBS","P06"))

#
GEX.seur$FB.new <- factor(as.character(GEX.seur$FB.info),
                          levels = rev(levels(GEX.seur$FB.info))[c(1:3,8:10)])

color.FBnew <- rev(color.FB1)

new markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "new_clusters"

#GEX.markers.new <- FindAllMarkers(GEX.seur, only.pos = TRUE, 
#                                  min.pct = 0.05,
#                                  test.use = "MAST",
#                                  logfc.threshold = 0.25)
GEX.markers.new <- read.table("sc10x_LYN.0921.marker.subset1_new.csv", header = TRUE, sep = ",")
GEX.markers.new %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
## # A tibble: 88 x 7
## # Groups:   cluster [11]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene    
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <chr>   <chr>   
##  1 1.97e-320      0.774 0.999 0.913 3.53e-316 C1      Cfl1    
##  2 2.15e-215      0.877 0.628 0.225 3.84e-211 C1      Igfbp4  
##  3 7.74e-214      0.630 0.999 0.944 1.39e-209 C1      Rps26   
##  4 5.89e-162      0.762 0.623 0.266 1.05e-157 C1      Emp3    
##  5 1.12e-155      0.660 0.884 0.569 2.01e-151 C1      Rbm3    
##  6 1.01e-142      0.731 0.563 0.236 1.81e-138 C1      Lsp1    
##  7 3.97e-117      0.612 0.659 0.348 7.10e-113 C1      Ramp1   
##  8 2.22e- 84      0.719 0.528 0.273 3.98e- 80 C1      Ifi27l2a
##  9 0              0.674 1     1     0         C2      Fau     
## 10 0              0.649 1     0.984 0         C2      Rps15a  
## # ... with 78 more rows
markers.new <- GEX.markers.new
markers.new$cluster <- factor(as.character(GEX.markers.new$cluster),
                               levels = levels(GEX.seur$new_clusters))

markers.new_t60 <- (markers.new %>% group_by(cluster) %>% 
                  filter(pct.1>0.1 & gene %in% grep("Rps|Rpl|Gm|Rik$|Xist|Tsix",markers.new$gene,invert = T,value = T)) %>%
                   top_n(n = 60, wt = avg_log2FC) %>%
                   ungroup() %>%
  #arrange(desc(avg_log2FC*pct.1),gene) %>%
    arrange(desc(pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
DotPlot(GEX.seur, group.by = "new_clusters", features = rev(markers.new_t60[1:64]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "new_clusters", features = rev(markers.new_t60[65:128]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "new_clusters", features = rev(markers.new_t60[129:192]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "new_clusters", features = rev(markers.new_t60[193:256]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "new_clusters", features = rev(markers.new_t60[257:320]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "new_clusters", features = rev(markers.new_t60[321:384]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "new_clusters", features = rev(markers.new_t60[385:457]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

plot1

umap in clusters

pp.umap.1a <- DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters", raster = F, pt.size = 0.3
                      ) +
           DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.new", cols = color.FBnew, raster = F, pt.size = 0.3
                   )
pp.umap.1a

pp.umap.1b <- DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "new_clusters", repel = F, raster = F, pt.size = 0.3
                      ) +
           DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.new", cols = color.FBnew, raster = F, pt.size = 0.3
                   )
pp.umap.1b

plot2

pp.umap.2a <- DimPlot(GEX.seur, label = F,  repel = F, reduction = 'umap', group.by = "FB.new", split.by = "FB.new", raster = F, pt.size = 0.3,
        ncol = 3, cols = color.FBnew)
pp.umap.2a

pp.umap.2b <- DimPlot(GEX.seur, label = F,repel = F, reduction = 'umap', group.by = "FB.new", split.by = "age", 
                     raster = F, pt.size = 0.3,
                     ncol = 3, cols = color.FBnew)
pp.umap.2b

pp.umap.2c <- DimPlot(GEX.seur, label = F,  repel = F, reduction = 'umap', group.by = "new_clusters", split.by = "FB.new", raster = F, pt.size = 0.3,
        ncol = 3)
pp.umap.2c

pp.umap.2d <- DimPlot(GEX.seur, label = F,repel = F, reduction = 'umap', group.by = "new_clusters", split.by = "age", 
                     raster = F, pt.size = 0.3,
                     ncol = 3)
pp.umap.2d

plot3

composition

pp.comp.3a <- cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.new,
      clusters=GEX.seur$new_clusters),
                   main = "Cell Count",
                   gaps_row = c(3),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.new,
                                clusters=GEX.seur$new_clusters)),
                   main = "Cell Ratio",
                   gaps_row = c(3),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
pp.comp.3a

stat_sort <- GEX.seur@meta.data[,c("FB.new","new_clusters")]
stat_sort.s <- stat_sort %>%
  group_by(FB.new,new_clusters) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup()
## `summarise()` has grouped output by 'FB.new'. You can override using the
## `.groups` argument.
dim(stat_sort.s)
## [1] 63  4
#stat_sort.s
pp.comp.3b <- stat_sort.s %>% 
  ggplot(aes(x = new_clusters, y = 100*prop, color=FB.new)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) + 
  scale_color_manual(values = color.FBnew, name="") +
  labs(title="Cell Composition", y="Proportion") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x = new_clusters, y = 100*prop, color=FB.new),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=1.75,
             stroke=0.15, show.legend = F)
pp.comp.3b

plot4

final.markers <- c("Apoe","Ms4a6d","Ms4a6c","Apoc1",
                   "Apoc2","Fn1","Tspo","Ctss",
                   "Ctsc","Ctsd","Igf1","Sparc",
                   "Siglech","P2ry12","P2ry13","Tmem119",
                   "Sall1","Slc2a5","Gpr34","Maf")
length(final.markers)
## [1] 20
sum(final.markers %in% markers.new$gene)
## [1] 19
final.markers.sort <- (markers.new %>% group_by(cluster) %>%
                         filter(gene %in% final.markers) %>%
                         ungroup() %>%
                         arrange(desc(avg_log2FC*pct.1),gene) %>%
                         distinct(gene, .keep_all = TRUE) %>%
                         arrange(cluster, p_val_adj))$gene
final.markers.sort
##  [1] "Sparc"   "Apoe"    "Apoc1"   "Ctss"    "Apoc2"   "Ms4a6c"  "Igf1"   
##  [8] "Tspo"    "Ctsc"    "Fn1"     "Gpr34"   "P2ry13"  "Tmem119" "P2ry12" 
## [15] "Slc2a5"  "Siglech" "Sall1"   "Ms4a6d"  "Ctsd"
setdiff(final.markers,final.markers.sort)
## [1] "Maf"
final.markers.new <- c(final.markers.sort[1:14],"Maf",final.markers.sort[15:19])
mapal <- colorRampPalette(RColorBrewer::brewer.pal(4,"Spectral"))(120)
pp.dot.4a1 <- DotPlot(GEX.seur, features = final.markers, group.by = "new_clusters"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters")
pp.dot.4a1

pp.dot.4a1 <- DotPlot(GEX.seur, features = final.markers.sort, group.by = "new_clusters"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters")
pp.dot.4a1

pp.dot.4a1 <- DotPlot(GEX.seur, features = final.markers.new, group.by = "new_clusters"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters")
pp.dot.4a1

pp.dot.4a2 <- DotPlot(GEX.seur, features = final.markers.new, group.by = "new_clusters"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters")  +
  scale_color_gradientn(colours  = rev(mapal))
pp.dot.4a2

pp.dot.4b1 <- DotPlot(GEX.seur, features = final.markers.new, group.by = "FB.new"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Conditions")
pp.dot.4b1

pp.dot.4b2 <- DotPlot(GEX.seur, features = final.markers.new, group.by = "FB.new"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Conditions")  +
  scale_color_gradientn(colours  = rev(mapal))
pp.dot.4b2

levels.cluster_cnt <- as.vector(unlist(sapply(levels(GEX.seur$new_clusters),function(x){
                         paste0(x,"_",levels(GEX.seur$FB.new))
                      })))
levels.cluster_cnt
##  [1] "C1_P06.CTR"      "C1_P06.MIG"      "C1_P06.TDT"      "C1_P30.PBS.CTR" 
##  [5] "C1_P30.PBS.MIG"  "C1_P30.PBS.TDT"  "C2_P06.CTR"      "C2_P06.MIG"     
##  [9] "C2_P06.TDT"      "C2_P30.PBS.CTR"  "C2_P30.PBS.MIG"  "C2_P30.PBS.TDT" 
## [13] "C3_P06.CTR"      "C3_P06.MIG"      "C3_P06.TDT"      "C3_P30.PBS.CTR" 
## [17] "C3_P30.PBS.MIG"  "C3_P30.PBS.TDT"  "C4_P06.CTR"      "C4_P06.MIG"     
## [21] "C4_P06.TDT"      "C4_P30.PBS.CTR"  "C4_P30.PBS.MIG"  "C4_P30.PBS.TDT" 
## [25] "C5_P06.CTR"      "C5_P06.MIG"      "C5_P06.TDT"      "C5_P30.PBS.CTR" 
## [29] "C5_P30.PBS.MIG"  "C5_P30.PBS.TDT"  "C6_P06.CTR"      "C6_P06.MIG"     
## [33] "C6_P06.TDT"      "C6_P30.PBS.CTR"  "C6_P30.PBS.MIG"  "C6_P30.PBS.TDT" 
## [37] "C7_P06.CTR"      "C7_P06.MIG"      "C7_P06.TDT"      "C7_P30.PBS.CTR" 
## [41] "C7_P30.PBS.MIG"  "C7_P30.PBS.TDT"  "C8_P06.CTR"      "C8_P06.MIG"     
## [45] "C8_P06.TDT"      "C8_P30.PBS.CTR"  "C8_P30.PBS.MIG"  "C8_P30.PBS.TDT" 
## [49] "C9_P06.CTR"      "C9_P06.MIG"      "C9_P06.TDT"      "C9_P30.PBS.CTR" 
## [53] "C9_P30.PBS.MIG"  "C9_P30.PBS.TDT"  "C10_P06.CTR"     "C10_P06.MIG"    
## [57] "C10_P06.TDT"     "C10_P30.PBS.CTR" "C10_P30.PBS.MIG" "C10_P30.PBS.TDT"
## [61] "C11_P06.CTR"     "C11_P06.MIG"     "C11_P06.TDT"     "C11_P30.PBS.CTR"
## [65] "C11_P30.PBS.MIG" "C11_P30.PBS.TDT"
GEX.seur$cluster_cnt <- factor(paste0(as.character(GEX.seur$new_clusters),
                                      "_",
                                      as.character(GEX.seur$FB.new)),
                               levels = levels.cluster_cnt)
pp.dot.4c1 <- DotPlot(GEX.seur, features = final.markers.new, group.by = "cluster_cnt"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters_Conditions")
pp.dot.4c1

pp.dot.4c1a <- DotPlot(subset(GEX.seur,subset= age %in% c("P06") & new_clusters %in% paste0("C",1:8)), 
                       features = final.markers.new, group.by = "cluster_cnt"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters_Conditions")
pp.dot.4c1a

pp.dot.4c1b <- DotPlot(subset(GEX.seur,subset= age %in% c("P30.PBS") & new_clusters %in% paste0("C",8:11)), 
                       features = final.markers.new, group.by = "cluster_cnt"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters_Conditions")
pp.dot.4c1b

pp.dot.4c2 <- DotPlot(GEX.seur, features = final.markers.new, group.by = "cluster_cnt"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters_Conditions")  +
  scale_color_gradientn(colours  = rev(mapal))
pp.dot.4c2

pp.dot.4c2a <- DotPlot(subset(GEX.seur,subset= age %in% c("P06") & new_clusters %in% paste0("C",1:8)), 
                       features = final.markers.new, group.by = "cluster_cnt"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters_Conditions") +
  scale_color_gradientn(colours  = rev(mapal))
pp.dot.4c2a

pp.dot.4c2b <- DotPlot(subset(GEX.seur,subset= age %in% c("P30.PBS") & new_clusters %in% paste0("C",8:11)), 
                       features = final.markers.new, group.by = "cluster_cnt"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters_Conditions") +
  scale_color_gradientn(colours  = rev(mapal))
pp.dot.4c2b

plot5

violin using same markers in plot4

pp.violin.5a1 <- VlnPlot(GEX.seur, features = final.markers.new, group.by = "new_clusters", 
                      ncol = 2, pt.size = 0, raster = F) & geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.02)
pp.violin.5a1

pp.violin.5a2 <- VlnPlot(GEX.seur, features = final.markers.new, group.by = "new_clusters", 
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55)
pp.violin.5a2

pp.violin.5b1 <- VlnPlot(GEX.seur, features = final.markers.new, group.by = "FB.new", cols = color.FBnew,
                      ncol = 2, pt.size = 0, raster = F) & geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.02)
pp.violin.5b1

pp.violin.5b2 <- VlnPlot(GEX.seur, features = final.markers.new, group.by = "FB.new", cols = color.FBnew,
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55)
pp.violin.5b2

pp.violin.5b3 <- VlnPlot(GEX.seur, features = final.markers.new, group.by = "FB.new", cols = color.FBnew,
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55) & ylim(c(0,7.2)) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("P06.CTR","P06.MIG"),
                                                  c("P06.MIG","P06.TDT"),
                                                  c("P06.CTR","P06.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.MIG"),
                                                  c("P30.PBS.MIG","P30.PBS.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.TDT"),
                                                  c("P06.TDT","P30.PBS.TDT")),
                               label.y = c(6.3,6.8,7.3,6.3,6.8,7.3,7.8)-1,
                               size=1.75
                               )
pp.violin.5b3

pp.violin.5c1a <- VlnPlot(subset(GEX.seur,subset= age %in% c("P06") & new_clusters %in% paste0("C",1:8)), 
                          features = final.markers.new, group.by = "cluster_cnt", cols = rep(color.FBnew[1:3],8),
                      ncol = 2, pt.size = 0, raster = F) & geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.02)
pp.violin.5c1a

pp.violin.5c1b <- VlnPlot(subset(GEX.seur,subset= age %in% c("P30.PBS") & new_clusters %in% paste0("C",8:11)), 
                          features = final.markers.new, group.by = "cluster_cnt", cols = rep(color.FBnew[4:6],4),
                      ncol = 2, pt.size = 0, raster = F) & geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.02)
pp.violin.5c1b

pp.violin.5c2a <- VlnPlot(subset(GEX.seur,subset= age %in% c("P06") & new_clusters %in% paste0("C",1:8)), 
                          features = final.markers.new, group.by = "cluster_cnt", cols = rep(color.FBnew[1:3],8),
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55)
pp.violin.5c2a

pp.violin.5c2b <- VlnPlot(subset(GEX.seur,subset= age %in% c("P30.PBS") & new_clusters %in% paste0("C",8:11)), 
                          features = final.markers.new, group.by = "cluster_cnt", cols = rep(color.FBnew[4:6],4),
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55)
pp.violin.5c2b

contrast.a <- list(c("P06.CTR","P06.MIG"),
                   c("P06.MIG","P06.TDT"),
                   c("P06.CTR","P06.TDT"))

plot.contrast.a <- list()

for(XXX in paste0("C",1:8)){
  
  
  plot.contrast.a <- c(plot.contrast.a,
                       lapply(contrast.a, function(x){
                         paste0(XXX,
                                "_",
                                x)
                       }))
}

plot.labely.a <- rep(c(6.3,6.8,7.3),8)
#
length(plot.contrast.a)
## [1] 24
plot.contrast.a[1:6]
## [[1]]
## [1] "C1_P06.CTR" "C1_P06.MIG"
## 
## [[2]]
## [1] "C1_P06.MIG" "C1_P06.TDT"
## 
## [[3]]
## [1] "C1_P06.CTR" "C1_P06.TDT"
## 
## [[4]]
## [1] "C2_P06.CTR" "C2_P06.MIG"
## 
## [[5]]
## [1] "C2_P06.MIG" "C2_P06.TDT"
## 
## [[6]]
## [1] "C2_P06.CTR" "C2_P06.TDT"
contrast.b <- list(c("P30.PBS.CTR","P30.PBS.MIG"),
                   c("P30.PBS.MIG","P30.PBS.TDT"),
                   c("P30.PBS.CTR","P30.PBS.TDT"))

plot.contrast.b <- list()

for(XXX in paste0("C",8:11)){
  
  
  plot.contrast.b <- c(plot.contrast.b,
                       lapply(contrast.b, function(x){
                         paste0(XXX,
                                "_",
                                x)
                       }))
}

plot.labely.b <- rep(c(6.3,6.8,7.3),4)
#
length(plot.contrast.b)
## [1] 12
plot.contrast.b[1:6]
## [[1]]
## [1] "C8_P30.PBS.CTR" "C8_P30.PBS.MIG"
## 
## [[2]]
## [1] "C8_P30.PBS.MIG" "C8_P30.PBS.TDT"
## 
## [[3]]
## [1] "C8_P30.PBS.CTR" "C8_P30.PBS.TDT"
## 
## [[4]]
## [1] "C9_P30.PBS.CTR" "C9_P30.PBS.MIG"
## 
## [[5]]
## [1] "C9_P30.PBS.MIG" "C9_P30.PBS.TDT"
## 
## [[6]]
## [1] "C9_P30.PBS.CTR" "C9_P30.PBS.TDT"
pp.violin.5c3a <- VlnPlot(subset(GEX.seur,subset= age %in% c("P06") & new_clusters %in% paste0("C",1:8)), 
                          features = final.markers.new, group.by = "cluster_cnt", cols = rep(color.FBnew[1:3],8),
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55) & ylim(c(0,6.8)) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = plot.contrast.a,
                               label.y = plot.labely.a-1,
                               size=1.5
                               ) 
pp.violin.5c3a

pp.violin.5c3b <- VlnPlot(subset(GEX.seur,subset= age %in% c("P30.PBS") & new_clusters %in% paste0("C",8:11)), 
                          features = final.markers.new, group.by = "cluster_cnt", cols = rep(color.FBnew[4:6],4),
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55) & ylim(c(0,6.8)) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = plot.contrast.b,
                               label.y = plot.labely.b-1,
                               size=1.75
                               )
pp.violin.5c3b

plot6

DAM score, using ready-plot above

pp.feat.6a1 <- FeaturePlot(GEX.seur,features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), ncol = 4,
                           raster = T, pt.size = 3.5#,keep.scale = "all"
                           )
pp.feat.6a1

mapal <- colorRampPalette(RColorBrewer::brewer.pal(4,"Spectral"))(120)
pp.feat.6a2 <- FeaturePlot(GEX.seur,features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), ncol = 4,
                        raster = T, pt.size = 3.5,
            keep.scale = "all") & scale_color_gradientn(colors = rev(mapal))
pp.feat.6a2

pp.feat.6b1 <- VlnPlot(GEX.seur, features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), 
                      group.by = "cnt", cols = color.cnt,
                      ncol = 2, pt.size = 0, raster = F) & geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.02)
pp.feat.6b1

pp.feat.6b2 <- VlnPlot(GEX.seur, features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), 
                      same.y.lims = T,
                      group.by = "cnt", cols = color.cnt,
                      ncol = 2, pt.size = 0, raster = F) & 
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & 
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("P06.CTR","P06.MIG"),
                                                  c("P06.MIG","P06.TDT"),
                                                  c("P06.CTR","P06.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.MIG"),
                                                  c("P30.PBS.MIG","P30.PBS.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.TDT"),
                                                  c("P06.TDT","P30.PBS.TDT")),
                               label.y = c(0.6,0.75,0.9,0.6,0.75,0.9,1.05),
                               size=2.35
                               )
pp.feat.6b2

pp.feat.6c1 <- VlnPlot(GEX.seur, features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), 
                      group.by = "new_clusters", #cols = color.cnt,
                      ncol = 2, pt.size = 0, raster = F) & geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.02)
pp.feat.6c1

pp.feat.6c2 <- VlnPlot(GEX.seur, features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), 
                      group.by = "new_clusters", #cols = color.cnt,
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
pp.feat.6c2

pp.feat.6c3a <- VlnPlot(subset(GEX.seur,subset= age %in% c("P06") & new_clusters %in% paste0("C",1:8)), 
                          features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), 
                        group.by = "cluster_cnt", cols = rep(color.FBnew[1:3],8),
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55) & ylim(c(-0.25,0.88)) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = plot.contrast.a,
                               label.y = (plot.labely.a-1)/8,
                               size=1.5
                               ) 
pp.feat.6c3a

pp.feat.6c3b <- VlnPlot(subset(GEX.seur,subset= age %in% c("P30.PBS") & new_clusters %in% paste0("C",8:11)), 
                          features = c("DAM.sig_top50","DAM.sig_top100","DAM.sig_top250","DAM.sig_top500"), 
                        group.by = "cluster_cnt", cols = rep(color.FBnew[4:6],4),
                      ncol = 2, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55) & ylim(c(-0.25,0.9)) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = plot.contrast.b,
                               label.y = (plot.labely.b-1)/7.5,
                               size=1.75
                               )
pp.feat.6c3b

plot7

count DEG number, using different cutoffs

cut.padj = 0.05
cut.log2FC = 0.1   
  
cut.pct1 = 0.05

stat.a1 <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
stat.a1
##           contrast     cluster up.DEGs
## 1     P06.TDTvsCTR     P06.CTR     174
## 2     P06.TDTvsCTR     P06.TDT      84
## 3     P06.TDTvsMIG     P06.MIG      70
## 4     P06.TDTvsMIG     P06.TDT      61
## 5 P30.PBS.TDTvsCTR P30.PBS.CTR      28
## 6 P30.PBS.TDTvsCTR P30.PBS.TDT      35
## 7 P30.PBS.TDTvsMIG P30.PBS.MIG      21
## 8 P30.PBS.TDTvsMIG P30.PBS.TDT       5
pp.stat.DEG[["a1"]] <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=contrast, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>0.05, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Number of DEGs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))
pp.stat.DEG[["a1"]]

cut.padj = 0.05
cut.log2FC = 0.2   
  
cut.pct1 = 0.05

stat.a2 <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
stat.a2
##           contrast     cluster up.DEGs
## 1     P06.TDTvsCTR     P06.CTR      62
## 2     P06.TDTvsCTR     P06.TDT      52
## 3     P06.TDTvsMIG     P06.MIG      11
## 4     P06.TDTvsMIG     P06.TDT      31
## 5 P30.PBS.TDTvsCTR P30.PBS.CTR       8
## 6 P30.PBS.TDTvsCTR P30.PBS.TDT      17
## 7 P30.PBS.TDTvsMIG P30.PBS.MIG       9
## 8 P30.PBS.TDTvsMIG P30.PBS.TDT       4
pp.stat.DEG[["a2"]] <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=contrast, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>0.05, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Number of DEGs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))
pp.stat.DEG[["a2"]]

cut.padj = 0.05
cut.log2FC = 0.3   
  
cut.pct1 = 0.05

stat.a3 <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
stat.a3
##           contrast     cluster up.DEGs
## 1     P06.TDTvsCTR     P06.CTR      14
## 2     P06.TDTvsCTR     P06.TDT      18
## 3     P06.TDTvsMIG     P06.MIG       3
## 4     P06.TDTvsMIG     P06.TDT      14
## 5 P30.PBS.TDTvsCTR P30.PBS.CTR       2
## 6 P30.PBS.TDTvsCTR P30.PBS.TDT       7
## 7 P30.PBS.TDTvsMIG P30.PBS.MIG       3
## 8 P30.PBS.TDTvsMIG P30.PBS.TDT       1
pp.stat.DEG[["a3"]] <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=contrast, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>0.05, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Number of DEGs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))
pp.stat.DEG[["a3"]]

DEG list

cut.padj = 0.05
cut.log2FC = 0.1   
  
cut.pct1 = 0.05

stat.a1.df <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  #summarise(up.DEGs = n()) %>% 
  as.data.frame()
dim(stat.a1.df)
## [1] 478   8
stat.a1.df[1:6,]
##           p_val avg_log2FC pct.1 pct.2     p_val_adj cluster          gene
## 1 4.680399e-144  0.7351005 0.346 0.051 8.377914e-140 P06.CTR tdTomato-head
## 2  8.887603e-68  0.2127752 1.000 1.000  1.590881e-63 P06.CTR        Tmsb4x
## 3  2.787862e-66  0.2497754 1.000 1.000  4.990272e-62 P06.CTR          Actb
## 4  4.522767e-58  0.2936516 1.000 0.999  8.095754e-54 P06.CTR         Sparc
## 5  5.861724e-47  0.4937834 0.627 0.421  1.049249e-42 P06.CTR         Ramp1
## 6  4.523598e-44  0.3378062 0.985 0.954  8.097241e-40 P06.CTR           Maf
##       contrast
## 1 P06.TDTvsCTR
## 2 P06.TDTvsCTR
## 3 P06.TDTvsCTR
## 4 P06.TDTvsCTR
## 5 P06.TDTvsCTR
## 6 P06.TDTvsCTR
cut.padj = 0.05
cut.log2FC = 0.2   
  
cut.pct1 = 0.05

stat.a2.df <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  #summarise(up.DEGs = n()) %>% 
  as.data.frame()
dim(stat.a2.df)
## [1] 194   8
stat.a2.df[1:6,]
##           p_val avg_log2FC pct.1 pct.2     p_val_adj cluster          gene
## 1 4.680399e-144  0.7351005 0.346 0.051 8.377914e-140 P06.CTR tdTomato-head
## 2  8.887603e-68  0.2127752 1.000 1.000  1.590881e-63 P06.CTR        Tmsb4x
## 3  2.787862e-66  0.2497754 1.000 1.000  4.990272e-62 P06.CTR          Actb
## 4  4.522767e-58  0.2936516 1.000 0.999  8.095754e-54 P06.CTR         Sparc
## 5  5.861724e-47  0.4937834 0.627 0.421  1.049249e-42 P06.CTR         Ramp1
## 6  4.523598e-44  0.3378062 0.985 0.954  8.097241e-40 P06.CTR           Maf
##       contrast
## 1 P06.TDTvsCTR
## 2 P06.TDTvsCTR
## 3 P06.TDTvsCTR
## 4 P06.TDTvsCTR
## 5 P06.TDTvsCTR
## 6 P06.TDTvsCTR
cut.padj = 0.05
cut.log2FC = 0.3   
  
cut.pct1 = 0.05

stat.a3.df <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(contrast,cluster) %>%
  #summarise(up.DEGs = n()) %>% 
  as.data.frame()
dim(stat.a3.df)
## [1] 62  8
stat.a3.df[1:6,]
##           p_val avg_log2FC pct.1 pct.2     p_val_adj cluster          gene
## 1 4.680399e-144  0.7351005 0.346 0.051 8.377914e-140 P06.CTR tdTomato-head
## 2  5.861724e-47  0.4937834 0.627 0.421  1.049249e-42 P06.CTR         Ramp1
## 3  4.523598e-44  0.3378062 0.985 0.954  8.097241e-40 P06.CTR           Maf
## 4  1.317442e-43  0.3008473 0.996 0.994  2.358222e-39 P06.CTR        P2ry12
## 5  1.189013e-40  0.3307153 0.977 0.933  2.128334e-36 P06.CTR           Ckb
## 6  3.892612e-36  0.4690318 0.482 0.305  6.967776e-32 P06.CTR          Lsp1
##       contrast
## 1 P06.TDTvsCTR
## 2 P06.TDTvsCTR
## 3 P06.TDTvsCTR
## 4 P06.TDTvsCTR
## 5 P06.TDTvsCTR
## 6 P06.TDTvsCTR
#
stat.a1.list <- list(P06.TDTvsCTR.CTRup = (stat.a1.df %>% filter(contrast == "P06.TDTvsCTR" & cluster == "P06.CTR"))$gene,
                     P06.TDTvsCTR.TDTup = (stat.a1.df %>% filter(contrast == "P06.TDTvsCTR" & cluster == "P06.TDT"))$gene,
                     P06.TDTvsMIG.MIGup = (stat.a1.df %>% filter(contrast == "P06.TDTvsMIG" & cluster == "P06.MIG"))$gene,
                     P06.TDTvsMIG.TDTup = (stat.a1.df %>% filter(contrast == "P06.TDTvsMIG" & cluster == "P06.TDT"))$gene,
                     P30.PBS.TDTvsCTR.CTRup = (stat.a1.df %>% filter(contrast == "P30.PBS.TDTvsCTR" & cluster == "P30.PBS.CTR"))$gene,
                     P30.PBS.TDTvsCTR.TDTup = (stat.a1.df %>% filter(contrast == "P30.PBS.TDTvsCTR" & cluster == "P30.PBS.TDT"))$gene,
                     P30.PBS.TDTvsMIG.MIGup = (stat.a1.df %>% filter(contrast == "P30.PBS.TDTvsMIG" & cluster == "P30.PBS.MIG"))$gene,
                     P30.PBS.TDTvsMIG.TDTup = (stat.a1.df %>% filter(contrast == "P30.PBS.TDTvsMIG" & cluster == "P30.PBS.TDT"))$gene)
stat.a2.list <- list(P06.TDTvsCTR.CTRup = (stat.a2.df %>% filter(contrast == "P06.TDTvsCTR" & cluster == "P06.CTR"))$gene,
                     P06.TDTvsCTR.TDTup = (stat.a2.df %>% filter(contrast == "P06.TDTvsCTR" & cluster == "P06.TDT"))$gene,
                     P06.TDTvsMIG.MIGup = (stat.a2.df %>% filter(contrast == "P06.TDTvsMIG" & cluster == "P06.MIG"))$gene,
                     P06.TDTvsMIG.TDTup = (stat.a2.df %>% filter(contrast == "P06.TDTvsMIG" & cluster == "P06.TDT"))$gene,
                     P30.PBS.TDTvsCTR.CTRup = (stat.a2.df %>% filter(contrast == "P30.PBS.TDTvsCTR" & cluster == "P30.PBS.CTR"))$gene,
                     P30.PBS.TDTvsCTR.TDTup = (stat.a2.df %>% filter(contrast == "P30.PBS.TDTvsCTR" & cluster == "P30.PBS.TDT"))$gene,
                     P30.PBS.TDTvsMIG.MIGup = (stat.a2.df %>% filter(contrast == "P30.PBS.TDTvsMIG" & cluster == "P30.PBS.MIG"))$gene,
                     P30.PBS.TDTvsMIG.TDTup = (stat.a2.df %>% filter(contrast == "P30.PBS.TDTvsMIG" & cluster == "P30.PBS.TDT"))$gene)
stat.a3.list <- list(P06.TDTvsCTR.CTRup = (stat.a3.df %>% filter(contrast == "P06.TDTvsCTR" & cluster == "P06.CTR"))$gene,
                     P06.TDTvsCTR.TDTup = (stat.a3.df %>% filter(contrast == "P06.TDTvsCTR" & cluster == "P06.TDT"))$gene,
                     P06.TDTvsMIG.MIGup = (stat.a3.df %>% filter(contrast == "P06.TDTvsMIG" & cluster == "P06.MIG"))$gene,
                     P06.TDTvsMIG.TDTup = (stat.a3.df %>% filter(contrast == "P06.TDTvsMIG" & cluster == "P06.TDT"))$gene,
                     P30.PBS.TDTvsCTR.CTRup = (stat.a3.df %>% filter(contrast == "P30.PBS.TDTvsCTR" & cluster == "P30.PBS.CTR"))$gene,
                     P30.PBS.TDTvsCTR.TDTup = (stat.a3.df %>% filter(contrast == "P30.PBS.TDTvsCTR" & cluster == "P30.PBS.TDT"))$gene,
                     P30.PBS.TDTvsMIG.MIGup = (stat.a3.df %>% filter(contrast == "P30.PBS.TDTvsMIG" & cluster == "P30.PBS.MIG"))$gene,
                     P30.PBS.TDTvsMIG.TDTup = (stat.a3.df %>% filter(contrast == "P30.PBS.TDTvsMIG" & cluster == "P30.PBS.TDT"))$gene)
lapply(stat.a1.list,length)
## $P06.TDTvsCTR.CTRup
## [1] 174
## 
## $P06.TDTvsCTR.TDTup
## [1] 84
## 
## $P06.TDTvsMIG.MIGup
## [1] 70
## 
## $P06.TDTvsMIG.TDTup
## [1] 61
## 
## $P30.PBS.TDTvsCTR.CTRup
## [1] 28
## 
## $P30.PBS.TDTvsCTR.TDTup
## [1] 35
## 
## $P30.PBS.TDTvsMIG.MIGup
## [1] 21
## 
## $P30.PBS.TDTvsMIG.TDTup
## [1] 5
lapply(stat.a2.list,length)
## $P06.TDTvsCTR.CTRup
## [1] 62
## 
## $P06.TDTvsCTR.TDTup
## [1] 52
## 
## $P06.TDTvsMIG.MIGup
## [1] 11
## 
## $P06.TDTvsMIG.TDTup
## [1] 31
## 
## $P30.PBS.TDTvsCTR.CTRup
## [1] 8
## 
## $P30.PBS.TDTvsCTR.TDTup
## [1] 17
## 
## $P30.PBS.TDTvsMIG.MIGup
## [1] 9
## 
## $P30.PBS.TDTvsMIG.TDTup
## [1] 4
lapply(stat.a3.list,length)
## $P06.TDTvsCTR.CTRup
## [1] 14
## 
## $P06.TDTvsCTR.TDTup
## [1] 18
## 
## $P06.TDTvsMIG.MIGup
## [1] 3
## 
## $P06.TDTvsMIG.TDTup
## [1] 14
## 
## $P30.PBS.TDTvsCTR.CTRup
## [1] 2
## 
## $P30.PBS.TDTvsCTR.TDTup
## [1] 7
## 
## $P30.PBS.TDTvsMIG.MIGup
## [1] 3
## 
## $P30.PBS.TDTvsMIG.TDTup
## [1] 1

plot8

DEG volcano, set same x/y-axis

pp.DEGs.a$P06.TDTvsCTR <- DEGs.a.combine$P06.TDTvsCTR %>%
  mutate(label=ifelse(((p_val_adj < 1e-8 & avg_log2FC<0) | (p_val_adj < 1e-8 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTR",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTR"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P06, TDT vs CTR") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,1.75)) + ylim(c(0,60))
pp.DEGs.a$P06.TDTvsCTR

pp.DEGs.a$P06.TDTvsMIG <- DEGs.a.combine$P06.TDTvsMIG %>%
  mutate(label=ifelse(((p_val_adj < 1e-5 & avg_log2FC<0) | (p_val_adj < 1e-5 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"MIG",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("MIG"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P06, TDT vs MIG") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,1.75)) + ylim(c(0,60))
pp.DEGs.a$P06.TDTvsMIG

pp.DEGs.a$P30.PBS.TDTvsCTR <- DEGs.a.combine$P30.PBS.TDTvsCTR %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTR",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTR"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P30.PBS, TDT vs CTR") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,1.75)) + ylim(c(0,60))
pp.DEGs.a$P30.PBS.TDTvsCTR

pp.DEGs.a$P30.PBS.TDTvsMIG <- DEGs.a.combine$P30.PBS.TDTvsMIG %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"MIG",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("MIG"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P30.PBS, TDT vs MIG") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")  + xlim(c(-1,1.75)) + ylim(c(0,60))
pp.DEGs.a$P30.PBS.TDTvsMIG

#saveRDS(GEX.seur,"./Spp1tdt.subset1_P06_P30PBS.new1008.rds")
#GEX.seur <- readRDS("./Spp1tdt.subset1_P06_P30PBS.new1008.rds")

newlist1009

dotplot and violin in conditions

GEX.seur
## An object of class Seurat 
## 17900 features across 12877 samples within 1 assay 
## Active assay: RNA (17900 features, 1090 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
marker1009 <- as.vector(unlist(read.table("./dotplot.subset1_newlist1009.txt")))
marker1009
##  [1] "Apoe"    "Ms4a6c"  "Ms4a6d"  "Ccl6"    "Tspo"    "Fn1"     "C4b"    
##  [8] "Spp1"    "Ms4a6b"  "Ccl12"   "Apoc1"   "Apoc2"   "Gpx3"    "Cd63"   
## [15] "Axl"     "Lpl"     "Fcrls"   "Gpr34"   "Siglech" "P2ry12"  "P2ry13" 
## [22] "Tmem119" "Sall1"   "Slc2a5"  "Cst3"
length(marker1009)
## [1] 25
sum(marker1009 %in% markers.new$gene)
## [1] 24
marker1009.sort <- (markers.new %>% group_by(cluster) %>% 
                  filter(gene %in% marker1009) %>%
                   ungroup() %>%
    #arrange(desc(avg_log2FC*pct.1),gene) %>%
    arrange(desc(avg_log2FC),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
length(marker1009)
## [1] 25
length(marker1009.sort)
## [1] 24
setdiff(marker1009,marker1009.sort)
## [1] "Fcrls"
marker1009.new <- c(marker1009.sort[1:15],"Fcrls",marker1009.sort[16:24])
pp.dot.x1a <- DotPlot(GEX.seur, features = marker1009, group.by = "new_clusters"
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters")
pp.dot.x1a

pp.dot.x1a <- DotPlot(GEX.seur, features = marker1009.new, group.by = "new_clusters",cols = c("lightgrey","red")
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Clusters")
pp.dot.x1a

pp.dot.x1a  + scale_color_gradientn(colours  = rev(mapal))

pp.dot.x1b <- DotPlot(GEX.seur, features = marker1009.new, group.by = "cnt",cols = c("lightgrey","red")
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Conditions")
pp.dot.x1b

pp.dot.x1b  + scale_color_gradientn(colours  = rev(mapal))

pp.violin.x1b <- VlnPlot(GEX.seur, features = marker1009.new, group.by = "cnt", cols = color.cnt,
                      ncol = 5, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55)
pp.violin.x1b

pp.violin.x1c <- VlnPlot(GEX.seur, features = marker1009.new, group.by = "cnt", cols = color.cnt,
                      ncol = 5, pt.size = 0, raster = F) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55) & ylim(c(0,7.2)) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("P06.CTR","P06.MIG"),
                                                  c("P06.MIG","P06.TDT"),
                                                  c("P06.CTR","P06.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.MIG"),
                                                  c("P30.PBS.MIG","P30.PBS.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.TDT"),
                                                  c("P06.TDT","P30.PBS.TDT")),
                               label.y = c(6.3,6.8,7.3,6.3,6.8,7.3,7.8)-1,
                               size=1.75
                               )
pp.violin.x1c

forGEO

GEX.seur <- readRDS("./Spp1tdt.subset1_P06_P30PBS.new1008.rds")
GEX.seur
## An object of class Seurat 
## 17900 features across 12877 samples within 1 assay 
## Active assay: RNA (17900 features, 1090 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
GEX.seur@meta.data[,grep("snn|pANN",colnames(GEX.seur@meta.data),value = T)] <- NULL
head(GEX.seur@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCAAGTCGGCCT-1    Spp1tdt       5529         2163   5.498282  17.887502
## AAACCCAAGTCTGCGC-1    Spp1tdt       8466         2389   3.874321  26.836759
## AAACCCACAAACTCGT-1    Spp1tdt       4188         1841   2.960840   8.572111
## AAACCCACAGTAGAAT-1    Spp1tdt       6855         2585   2.727936  14.529540
## AAACCCACATCCGTGG-1    Spp1tdt       2337         1225   3.979461   7.659392
## AAACCCAGTTACGCCG-1    Spp1tdt       2915         1382   1.234991  13.687822
##                        FB.info     S.Score    G2M.Score Phase seurat_clusters
## AAACCCAAGTCGGCCT-1     P06.TDT -0.04932171 -0.114069796    G1               5
## AAACCCAAGTCTGCGC-1     P06.TDT -0.02781008 -0.083520479    G1               5
## AAACCCACAAACTCGT-1 P30.PBS.MIG  0.01921373  0.060314405   G2M               1
## AAACCCACAGTAGAAT-1     P06.CTR  0.45758583  0.005436139     S               4
## AAACCCACATCCGTGG-1 P30.PBS.TDT  0.04897564 -0.042116708     S               1
## AAACCCAGTTACGCCG-1 P30.PBS.TDT -0.01777409 -0.043560404    G1               0
##                    DoubletFinder0.05 DoubletFinder0.1 sort_clusters     age
## AAACCCAAGTCGGCCT-1           Singlet          Singlet             5     P06
## AAACCCAAGTCTGCGC-1           Singlet          Singlet             5     P06
## AAACCCACAAACTCGT-1           Singlet          Singlet             1 P30.PBS
## AAACCCACAGTAGAAT-1           Singlet          Doublet             4     P06
## AAACCCACATCCGTGG-1           Singlet          Singlet             1 P30.PBS
## AAACCCAGTTACGCCG-1           Singlet          Singlet             0 P30.PBS
##                            cnt   preAnno DAM.sig_top50 DAM.sig_top100
## AAACCCAAGTCGGCCT-1     P06.TDT Microglia    0.15167590   -0.017812540
## AAACCCAAGTCTGCGC-1     P06.TDT Microglia    0.39501325    0.269974027
## AAACCCACAAACTCGT-1 P30.PBS.MIG Microglia    0.06770196    0.077388291
## AAACCCACAGTAGAAT-1     P06.CTR Microglia   -0.16981972   -0.108029609
## AAACCCACATCCGTGG-1 P30.PBS.TDT Microglia    0.14346077    0.008080233
## AAACCCAGTTACGCCG-1 P30.PBS.TDT Microglia   -0.10833692   -0.041742952
##                    DAM.sig_top250 DAM.sig_top500 new_clusters    age1
## AAACCCAAGTCGGCCT-1     0.02914850     0.20464023           C3     P06
## AAACCCAAGTCTGCGC-1     0.23830051     0.38608163           C3     P06
## AAACCCACAAACTCGT-1     0.03449641     0.16261439          C10 P30.PBS
## AAACCCACAGTAGAAT-1    -0.06343684     0.07254075           C1     P06
## AAACCCACATCCGTGG-1    -0.01448075     0.11958677          C10 P30.PBS
## AAACCCAGTTACGCCG-1    -0.01268726     0.17698730           C9 P30.PBS
##                         FB.new     cluster_cnt
## AAACCCAAGTCGGCCT-1     P06.TDT      C3_P06.TDT
## AAACCCAAGTCTGCGC-1     P06.TDT      C3_P06.TDT
## AAACCCACAAACTCGT-1 P30.PBS.MIG C10_P30.PBS.MIG
## AAACCCACAGTAGAAT-1     P06.CTR      C1_P06.CTR
## AAACCCACATCCGTGG-1 P30.PBS.TDT C10_P30.PBS.TDT
## AAACCCAGTTACGCCG-1 P30.PBS.TDT  C9_P30.PBS.TDT
#saveRDS(GEX.seur,"I:/Shared_win/projects/202310_Spp1DAM/forGEO/seur_obj/Spp1TDT_P06P30.final.seur_obj.rds")